accelforge 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (258) hide show
  1. accelforge/__init__.py +21 -0
  2. accelforge/_accelerated_imports.py +16 -0
  3. accelforge/_deprecate/_simanneal/evalmapping.py +271 -0
  4. accelforge/_deprecate/_simanneal/mapspaceglobals.py +298 -0
  5. accelforge/_deprecate/_simanneal/simanneal.py +666 -0
  6. accelforge/_deprecate/_simanneal/tracking.py +105 -0
  7. accelforge/_deprecate/_simanneal/wrappers.py +218 -0
  8. accelforge/_deprecate/_simanneal2/__init__.py +7 -0
  9. accelforge/_deprecate/_simanneal2/simanneal.py +493 -0
  10. accelforge/_deprecate/_simanneal2/tracking.py +116 -0
  11. accelforge/_deprecate/compatibility_util.py +181 -0
  12. accelforge/_deprecate/layerdeduplication/__init__.py +2 -0
  13. accelforge/_deprecate/layerdeduplication/group_similar_einsums.py +160 -0
  14. accelforge/_deprecate/layerdeduplication/grouped_einsums.py +84 -0
  15. accelforge/_deprecate/mapping_filter_tags/__init__.py +2 -0
  16. accelforge/_deprecate/mapping_filter_tags/ffmt.py +212 -0
  17. accelforge/_deprecate/mapping_filter_tags/onesplit.py +24 -0
  18. accelforge/_deprecate/mapping_filter_tags/util.py +24 -0
  19. accelforge/_deprecate/tags.py +69 -0
  20. accelforge/_deprecate/viz/__init__.py +0 -0
  21. accelforge/_deprecate/viz/interactive.py +159 -0
  22. accelforge/_deprecate/viz/reservationtree.py +307 -0
  23. accelforge/_deprecate/viz/ski_slope.py +88 -0
  24. accelforge/_version.py +15 -0
  25. accelforge/examples.py +39 -0
  26. accelforge/frontend/__init__.py +10 -0
  27. accelforge/frontend/_binding.py +129 -0
  28. accelforge/frontend/_workload_isl/__init__.py +2 -0
  29. accelforge/frontend/_workload_isl/_isl.py +149 -0
  30. accelforge/frontend/_workload_isl/_symbolic.py +141 -0
  31. accelforge/frontend/arch copy.py +1544 -0
  32. accelforge/frontend/arch.py +1642 -0
  33. accelforge/frontend/config.py +63 -0
  34. accelforge/frontend/mapper/__init__.py +5 -0
  35. accelforge/frontend/mapper/ffm.py +126 -0
  36. accelforge/frontend/mapper/mapper.py +7 -0
  37. accelforge/frontend/mapper/metrics.py +30 -0
  38. accelforge/frontend/mapping/__init__.py +1 -0
  39. accelforge/frontend/mapping/mapping.py +1736 -0
  40. accelforge/frontend/model.py +14 -0
  41. accelforge/frontend/renames.py +150 -0
  42. accelforge/frontend/spec copy.py +230 -0
  43. accelforge/frontend/spec.py +301 -0
  44. accelforge/frontend/variables.py +12 -0
  45. accelforge/frontend/workload.py +952 -0
  46. accelforge/mapper/FFM/__init__.py +9 -0
  47. accelforge/mapper/FFM/_join_pmappings/__init__.py +0 -0
  48. accelforge/mapper/FFM/_join_pmappings/compatibility.py +653 -0
  49. accelforge/mapper/FFM/_join_pmappings/compress_pmappings.py +140 -0
  50. accelforge/mapper/FFM/_join_pmappings/join_pmappings.py +703 -0
  51. accelforge/mapper/FFM/_join_pmappings/pmapping_dataframe.py +901 -0
  52. accelforge/mapper/FFM/_join_pmappings/pmapping_group.py +337 -0
  53. accelforge/mapper/FFM/_make_pmappings/contraints/__init__.py +0 -0
  54. accelforge/mapper/FFM/_make_pmappings/contraints/constraints.py +360 -0
  55. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/__init__.py +1 -0
  56. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_loops.py +373 -0
  57. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_pmapping_templates.py +463 -0
  58. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_reservations.py +95 -0
  59. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storage_order.py +382 -0
  60. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storages.py +155 -0
  61. accelforge/mapper/FFM/_make_pmappings/make_pmappings.py +411 -0
  62. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/__init__.py +1 -0
  63. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_pmappings_from_templates.py +407 -0
  64. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_tile_shapes.py +1681 -0
  65. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/run_model.py +170 -0
  66. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/symbol_relations.py +174 -0
  67. accelforge/mapper/FFM/_make_pmappings/pmapper_job.py +282 -0
  68. accelforge/mapper/FFM/_pareto_df/df_convention.py +273 -0
  69. accelforge/mapper/FFM/_pareto_df/pareto copy.py +836 -0
  70. accelforge/mapper/FFM/_pareto_df/pareto.py +508 -0
  71. accelforge/mapper/FFM/data.py +61 -0
  72. accelforge/mapper/FFM/main copy.py +236 -0
  73. accelforge/mapper/FFM/main.py +208 -0
  74. accelforge/mapper/FFM/mappings.py +510 -0
  75. accelforge/mapper/FFM/pmappings.py +310 -0
  76. accelforge/mapper/__init__.py +4 -0
  77. accelforge/mapper.py +0 -0
  78. accelforge/model/__init__.py +1 -0
  79. accelforge/model/_looptree/__init__.py +0 -0
  80. accelforge/model/_looptree/accesses.py +335 -0
  81. accelforge/model/_looptree/capacity/__init__.py +1 -0
  82. accelforge/model/_looptree/capacity/aggregators.py +36 -0
  83. accelforge/model/_looptree/capacity/capacity.py +47 -0
  84. accelforge/model/_looptree/energy.py +150 -0
  85. accelforge/model/_looptree/equivalent_ranks.py +29 -0
  86. accelforge/model/_looptree/latency/__init__.py +1 -0
  87. accelforge/model/_looptree/latency/latency.py +98 -0
  88. accelforge/model/_looptree/latency/memory.py +120 -0
  89. accelforge/model/_looptree/latency/processors.py +92 -0
  90. accelforge/model/_looptree/mapping_utilities.py +71 -0
  91. accelforge/model/_looptree/reuse/__init__.py +4 -0
  92. accelforge/model/_looptree/reuse/isl/__init__.py +1 -0
  93. accelforge/model/_looptree/reuse/isl/des.py +59 -0
  94. accelforge/model/_looptree/reuse/isl/isl_functions.py +374 -0
  95. accelforge/model/_looptree/reuse/isl/mapping_to_isl/__init__.py +4 -0
  96. accelforge/model/_looptree/reuse/isl/mapping_to_isl/analyze_mapping.py +297 -0
  97. accelforge/model/_looptree/reuse/isl/mapping_to_isl/skews_from_mapping.py +236 -0
  98. accelforge/model/_looptree/reuse/isl/mapping_to_isl/tiling.py +685 -0
  99. accelforge/model/_looptree/reuse/isl/mapping_to_isl/types.py +188 -0
  100. accelforge/model/_looptree/reuse/isl/spatial.py +260 -0
  101. accelforge/model/_looptree/reuse/isl/temporal.py +182 -0
  102. accelforge/model/_looptree/reuse/symbolic/__init__.py +1 -0
  103. accelforge/model/_looptree/reuse/symbolic/symbolic copy 2.py +1346 -0
  104. accelforge/model/_looptree/reuse/symbolic/symbolic copy.py +1408 -0
  105. accelforge/model/_looptree/reuse/symbolic/symbolic.py +1396 -0
  106. accelforge/model/_looptree/run.py +122 -0
  107. accelforge/model/_looptree/types.py +26 -0
  108. accelforge/model/_looptree/visualization/__init__.py +0 -0
  109. accelforge/model/_looptree/visualization/occupancy.py +11 -0
  110. accelforge/model/main.py +222 -0
  111. accelforge/plotting/__init__.py +2 -0
  112. accelforge/plotting/mappings.py +219 -0
  113. accelforge/plotting/specs.py +57 -0
  114. accelforge/util/__init__.py +4 -0
  115. accelforge/util/_base_analysis_types.py +24 -0
  116. accelforge/util/_basetypes.py +1089 -0
  117. accelforge/util/_frozenset.py +36 -0
  118. accelforge/util/_isl.py +29 -0
  119. accelforge/util/_itertools.py +14 -0
  120. accelforge/util/_mathfuncs.py +57 -0
  121. accelforge/util/_parse_expressions.py +339 -0
  122. accelforge/util/_picklecache.py +32 -0
  123. accelforge/util/_setexpressions.py +268 -0
  124. accelforge/util/_sympy/__init__.py +0 -0
  125. accelforge/util/_sympy/broadcast_max.py +18 -0
  126. accelforge/util/_visualization.py +112 -0
  127. accelforge/util/_yaml.py +579 -0
  128. accelforge/util/parallel.py +193 -0
  129. accelforge-0.0.1.dist-info/METADATA +64 -0
  130. accelforge-0.0.1.dist-info/RECORD +258 -0
  131. accelforge-0.0.1.dist-info/WHEEL +5 -0
  132. accelforge-0.0.1.dist-info/licenses/LICENSE +19 -0
  133. accelforge-0.0.1.dist-info/top_level.txt +5 -0
  134. docs/_build/html/_sources/fastfusion.frontend.mapper.rst.txt +37 -0
  135. docs/_build/html/_sources/fastfusion.frontend.rst.txt +70 -0
  136. docs/_build/html/_sources/fastfusion.frontend.workload.rst.txt +21 -0
  137. docs/_build/html/_sources/fastfusion.mapper.FFM.rst.txt +37 -0
  138. docs/_build/html/_sources/fastfusion.mapper.rst.txt +18 -0
  139. docs/_build/html/_sources/fastfusion.rst.txt +20 -0
  140. docs/_build/html/_sources/fastfusion.util.rst.txt +21 -0
  141. docs/_build/html/_sources/index.rst.txt +87 -0
  142. docs/_build/html/_sources/modules.rst.txt +7 -0
  143. docs/_build/html/_sources/notes/citation.rst.txt +45 -0
  144. docs/_build/html/_sources/notes/definitions.rst.txt +43 -0
  145. docs/_build/html/_sources/notes/faqs.rst.txt +39 -0
  146. docs/_build/html/_sources/notes/modeling/accelerator_energy_latency.rst.txt +72 -0
  147. docs/_build/html/_sources/notes/modeling/component_energy_area.rst.txt +96 -0
  148. docs/_build/html/_sources/notes/modeling/mapping.rst.txt +100 -0
  149. docs/_build/html/_sources/notes/modeling.rst.txt +33 -0
  150. docs/_build/html/_sources/notes/parsing/arithmetic_parsing.rst.txt +136 -0
  151. docs/_build/html/_sources/notes/parsing/setexpressions.rst.txt +63 -0
  152. docs/_build/html/_sources/notes/parsing/yaml_parsing.rst.txt +176 -0
  153. docs/_build/html/_sources/notes/quickstart_and_installation.rst.txt +9 -0
  154. docs/_build/html/_sources/notes/spec/architecture.rst.txt +133 -0
  155. docs/_build/html/_sources/notes/spec/mapping.rst.txt +12 -0
  156. docs/_build/html/_sources/notes/spec/workload.rst.txt +83 -0
  157. docs/_build/html/_sources/notes/spec.rst.txt +36 -0
  158. docs/source/_ext/include_attrs.py +213 -0
  159. docs/source/_ext/include_docstring.py +364 -0
  160. docs/source/_ext/include_functions.py +154 -0
  161. docs/source/_ext/include_notebook.py +131 -0
  162. docs/source/_ext/include_yaml.py +119 -0
  163. docs/source/_ext/inherited_attributes.py +222 -0
  164. docs/source/_ext/paths.py +4 -0
  165. docs/source/conf.py +79 -0
  166. examples/arches/compute_in_memory/_include.yaml +74 -0
  167. examples/arches/compute_in_memory/_include_functions.py +229 -0
  168. examples/arches/compute_in_memory/_load_spec.py +57 -0
  169. examples/arches/compute_in_memory/components/c2c_multiplier.py +181 -0
  170. examples/arches/compute_in_memory/components/dac_c2c_r2r.py +605 -0
  171. examples/arches/compute_in_memory/components/misc.py +195 -0
  172. examples/arches/compute_in_memory/components/util/bit_functions.py +51 -0
  173. examples/arches/compute_in_memory/components/zero_comparator.py +92 -0
  174. examples/arches/compute_in_memory/isaac.yaml +233 -0
  175. examples/arches/compute_in_memory/memory_cells/ecram_demo.yaml +63 -0
  176. examples/arches/compute_in_memory/memory_cells/rram_example.yaml +63 -0
  177. examples/arches/compute_in_memory/memory_cells/rram_isaac_isca_2016.yaml +64 -0
  178. examples/arches/compute_in_memory/memory_cells/rram_neurosim_default.yaml +63 -0
  179. examples/arches/compute_in_memory/memory_cells/rram_raella_isca_2023.yaml +70 -0
  180. examples/arches/compute_in_memory/memory_cells/rram_wan_nature_2022.yaml +63 -0
  181. examples/arches/compute_in_memory/memory_cells/sram_colonnade_jssc_2021.yaml +63 -0
  182. examples/arches/compute_in_memory/memory_cells/sram_example.yaml +63 -0
  183. examples/arches/compute_in_memory/memory_cells/sram_jia_jssc_2020.yaml +63 -0
  184. examples/arches/compute_in_memory/memory_cells/sram_sinangil_jssc_2021.yaml +63 -0
  185. examples/arches/compute_in_memory/memory_cells/sram_wang_vlsi_2022.yaml +63 -0
  186. examples/arches/compute_in_memory/wang_vlsi_2022.yaml +289 -0
  187. examples/arches/eyeriss.yaml +68 -0
  188. examples/arches/fanout_variations/at_glb.yaml +31 -0
  189. examples/arches/fanout_variations/at_glb_with_fanout_node.yaml +34 -0
  190. examples/arches/fanout_variations/at_mac.yaml +31 -0
  191. examples/arches/fanout_variations/at_mac_with_constraints.yaml +38 -0
  192. examples/arches/fanout_variations/at_mac_with_fanout_node.yaml +34 -0
  193. examples/arches/nvdla.yaml +47 -0
  194. examples/arches/simple.yaml +28 -0
  195. examples/arches/tpu_v4i.yaml +67 -0
  196. examples/mappings/unfused_matmuls_to_simple.yaml +33 -0
  197. examples/misc/component_annotated.yaml +33 -0
  198. examples/workloads/gpt3_6.7B.yaml +124 -0
  199. examples/workloads/matmuls.yaml +20 -0
  200. examples/workloads/mobilenet_28.yaml +81 -0
  201. examples/workloads/mobilenet_various_separate.yaml +106 -0
  202. examples/workloads/three_matmuls_annotated.yaml +59 -0
  203. notebooks/.ipynb_checkpoints/fastfusion_arch_study_michael-checkpoint.ipynb +359 -0
  204. notebooks/compute_in_memory/_scripts.py +339 -0
  205. notebooks/compute_in_memory/isaac.guide.ipynb +270 -0
  206. notebooks/compute_in_memory/wang_vlsi_2022.ipynb +602 -0
  207. notebooks/paths.py +4 -0
  208. notebooks/tutorials/.ipynb_checkpoints/1_FFM-checkpoint.ipynb +3110 -0
  209. notebooks/tutorials/FFM.ipynb +3498 -0
  210. notebooks/tutorials/_include.py +48 -0
  211. notebooks/tutorials/component_energy_area.ipynb +363 -0
  212. tests/Q_mapping.yaml +38 -0
  213. tests/__init__.py +0 -0
  214. tests/conv.mapping.yaml +27 -0
  215. tests/conv.workload.yaml +13 -0
  216. tests/conv_sym.mapping.yaml +43 -0
  217. tests/copy.mapping.yaml +35 -0
  218. tests/copy.workload.yaml +15 -0
  219. tests/distribuffers/__init__.py +0 -0
  220. tests/distribuffers/multicast/test_cases.yaml +482 -0
  221. tests/distribuffers/spec/binding/valid_bindings.yaml +97 -0
  222. tests/distribuffers/spec/distributed.yaml +100 -0
  223. tests/distribuffers/spec/logical_arch.yaml +32 -0
  224. tests/distribuffers/spec/physical_arch.yaml +69 -0
  225. tests/distribuffers/test_binding.py +48 -0
  226. tests/frontend/__init__.py +0 -0
  227. tests/frontend/test_mapping_viz.py +52 -0
  228. tests/mapper/__init__.py +0 -0
  229. tests/mapper/configs/conv1d/conv1d.mapping.yaml +31 -0
  230. tests/mapper/configs/conv1d/conv1d.workload.yaml +11 -0
  231. tests/mapper/configs/two_conv1d/two_conv1d.expected.yaml +38 -0
  232. tests/mapper/configs/two_conv1d/two_conv1d.mapping.yaml +54 -0
  233. tests/mapper/configs/two_conv1d/two_conv1d.workload.yaml +19 -0
  234. tests/mapper/test_mapping_to_isl.py +90 -0
  235. tests/mapper/test_spatial_reuse_analysis.py +67 -0
  236. tests/mapper/test_temporal_reuse_analysis.py +56 -0
  237. tests/mapper/util.py +58 -0
  238. tests/matmul.mapping.yaml +29 -0
  239. tests/matmul.workload.yaml +12 -0
  240. tests/matmul_spatial.mapping.yaml +44 -0
  241. tests/mha.renames.yaml +65 -0
  242. tests/mha.workload.yaml +67 -0
  243. tests/mha.yaml +59 -0
  244. tests/mha_full.workload.yaml +67 -0
  245. tests/mobilenet.workload.yaml +35 -0
  246. tests/mobilenet_long.workload.yaml +64 -0
  247. tests/pmappingcache.py +24 -0
  248. tests/processing_stage.arch.yaml +40 -0
  249. tests/snowcat.arch.yaml +36 -0
  250. tests/test_ffm_join_pmappings.py +106 -0
  251. tests/test_ffm_make_pmappings.py +82 -0
  252. tests/test_ffm_make_tile_shapes.py +49 -0
  253. tests/test_mapper.py +100 -0
  254. tests/test_model.py +37 -0
  255. tests/test_plotting.py +72 -0
  256. tests/test_processing_stage.py +46 -0
  257. tests/test_symbolic_model.py +248 -0
  258. tests/test_workload.py +141 -0
@@ -0,0 +1,602 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "7e2a43f3",
6
+ "metadata": {},
7
+ "source": [
8
+ "### Model of \"A 32.2 TOPS/W SRAM Compute-in-Memory Macro Employing a Linear 8-bit C-2C Ladder for Charge Domain Computation in 22nm for Edge Inference\", VLSI 2022\n",
9
+ "\n",
10
+ "Paper by Hechen Wang, Renzhi Liu, Richard Dorrance, Deepak Dasalukunte, Xiaosen\n",
11
+ "Liu, Dan Lake, Brent Carlton, and May Wu\n",
12
+ "\n"
13
+ ]
14
+ },
15
+ {
16
+ "cell_type": "code",
17
+ "execution_count": null,
18
+ "id": "e465e48f",
19
+ "metadata": {},
20
+ "outputs": [
21
+ {
22
+ "name": "stderr",
23
+ "output_type": "stream",
24
+ "text": [
25
+ "WARNING Loading configuration file from /home/tanner/.config/accelforge/config.yaml\n"
26
+ ]
27
+ },
28
+ {
29
+ "data": {
30
+ "text/markdown": [
31
+ "Some of the important variables for wang_vlsi_2022:\n",
32
+ "\n",
33
+ "- *array_wordlines*: array_parallel_inputs * cim_unit_width_cells rows in the array\n",
34
+ "- *array_bitlines*: array_parallel_outputs * cim_unit_depth_cells columns in the array\n",
35
+ "- *array_parallel_inputs*: get_array_fanout_reuse_output(spec) input slice(s) consumed in each cycle.\n",
36
+ "- *array_parallel_weights*: get_array_fanout_total(spec) weights slice(s) used for computation in each cycle.\n",
37
+ "- *array_parallel_outputs*: get_array_fanout_reuse_input(spec) partial sums produced in each cycle.\n",
38
+ "- *tech_node*: 2.2e-08 m\n",
39
+ "- *adc_resolution*: 8 bit(s)\n",
40
+ "- *dac_resolution*: max(voltage_dac_resolution, temporal_dac_resolution) bit(s)\n",
41
+ "- *n_adc_per_bank*: 16 ADC(s)\n",
42
+ "- *supported_input_bits*: 8 bit(s)\n",
43
+ "- *supported_output_bits*: 8 bit(s)\n",
44
+ "- *supported_weight_bits*: 8 bit(s)\n",
45
+ "- *bits_per_cell*: 1 bit(s)\n",
46
+ "- *cim_unit_width_cells*: supported_weight_bits adjacent cell(s) in a wordline store bit(s) in one weight slice and process one input & output slice together\n",
47
+ "- *cim_unit_depth_cells*: 8 adjacent cell(s) in a bitline operate in separate cycles\n",
48
+ "- *cell_config*: /home/tanner/research/fusion/accelforge/accelforge/examples/arches/compute_in_memory/memory_cells/sram_wang_vlsi_2022.yaml \n",
49
+ "- *cycle_period*: max(base_latency * no_faster_than_digital * voltage_latency_scale, limited_by_temporal_dac, 2e-9) second(s)"
50
+ ],
51
+ "text/plain": [
52
+ "<IPython.core.display.Markdown object>"
53
+ ]
54
+ },
55
+ "metadata": {},
56
+ "output_type": "display_data"
57
+ },
58
+ {
59
+ "name": "stderr",
60
+ "output_type": "stream",
61
+ "text": [
62
+ "WARNING Loading configuration file from /home/tanner/.config/accelforge/config.yaml\n"
63
+ ]
64
+ },
65
+ {
66
+ "data": {
67
+ "image/svg+xml": [
68
+ "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
69
+ "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
70
+ " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
71
+ "<!-- Generated by graphviz version 2.43.0 (0)\n",
72
+ " -->\n",
73
+ "<!-- Title: G Pages: 1 -->\n",
74
+ "<svg width=\"132pt\" height=\"44pt\"\n",
75
+ " viewBox=\"0.00 0.00 132.00 44.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
76
+ "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 40)\">\n",
77
+ "<title>G</title>\n",
78
+ "<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-40 128,-40 128,4 -4,4\"/>\n",
79
+ "<!-- root -->\n",
80
+ "<g id=\"node1\" class=\"node\">\n",
81
+ "<title>root</title>\n",
82
+ "<polygon fill=\"none\" stroke=\"black\" points=\"124,-36 0,-36 0,0 124,0 124,-36\"/>\n",
83
+ "<text text-anchor=\"middle\" x=\"62\" y=\"-14.9\" font-family=\"Arial\" font-size=\"12.00\">TODO: Arch Render</text>\n",
84
+ "</g>\n",
85
+ "</g>\n",
86
+ "</svg>\n"
87
+ ],
88
+ "text/plain": [
89
+ "Arch(nodes=ArchNodes([ProcessingStage(name='DAC', spatial=[], component_class='DualSidedR2RLadderDAC', component_model=None, component_modeling_log=[], actions=[TensorHolderAction(name='read', energy=None, energy_scale=1, latency=None, latency_scale=1, extra_attributes_for_component_model=ParseExtras(), bits_per_action='1 if bits_per_action is None else bits_per_action')], enabled=True, area=None, total_area=None, area_scale=1, leak_power=None, total_leak_power=None, leak_power_scale=1, energy_scale=1, total_latency='sum(*action2latency.values())', latency_scale=1, n_parallel_instances='array_parallel_inputs', extra_attributes_for_component_model=_ExtraAttrs(resolution='dac_resolution', unit_resistance='dac_unit_resistance', zero_between_values=0, bit_distribution='input_bit_distribution', hist='hist_to_magnitude(inputs_hist)'), tensors=Tensors(keep='input', may_keep='<Nothing if keep is defined, else All>', back='Nothing', tile_shape=[], no_refetch_from_above='~All', tensor_order_options=[], force_memory_hierarchy_order=True), bits_per_value_scale={'All': 1}, bits_per_action='input_bits / n_input_slices', direction='down'), ProcessingStage(name='RowDrivers', spatial=[], component_class='ArrayRowDrivers', component_model=None, component_modeling_log=[], actions=[TensorHolderAction(name='read', energy=None, energy_scale=1, latency=None, latency_scale=1, extra_attributes_for_component_model=ParseExtras(), bits_per_action='1 if bits_per_action is None else bits_per_action')], enabled=True, area=None, total_area=None, area_scale=1, leak_power=None, total_leak_power=None, leak_power_scale=1, energy_scale=1, total_latency='sum(*action2latency.values())', latency_scale=1, n_parallel_instances=1, extra_attributes_for_component_model=_ExtraAttrs(), tensors=Tensors(keep='input', may_keep='<Nothing if keep is defined, else All>', back='Nothing', tile_shape=[], no_refetch_from_above='~All', tensor_order_options=[], force_memory_hierarchy_order=True), bits_per_value_scale={'All': 1}, bits_per_action='input_bits / n_input_slices', direction='down'), ProcessingStage(name='WeightDrivers', spatial=[], component_class='ArrayRowDrivers', component_model=None, component_modeling_log=[], actions=[TensorHolderAction(name='read', energy=None, energy_scale=1, latency=None, latency_scale=1, extra_attributes_for_component_model=ParseExtras(), bits_per_action='1 if bits_per_action is None else bits_per_action')], enabled=True, area=None, total_area=None, area_scale=1, leak_power=None, total_leak_power=None, leak_power_scale=1, energy_scale=1, total_latency='sum(*action2latency.values())', latency_scale=1, n_parallel_instances=1, extra_attributes_for_component_model=_ExtraAttrs(rows='array_wordlines', cols='array_bitlines', cols_active_at_once='array_parallel_outputs', cell_config='cell_config', average_input_value='average_input_value', average_cell_value='average_weight_value', cycle_seconds='cycle_period', voltage='voltage', temporal_dac_bits='temporal_dac_resolution', sequential=True, read_pulse_width=0, resolution='adc_resolution', n_adcs='n_adc_per_bank', width='encoded_output_bits'), tensors=Tensors(keep='weight & Above', may_keep='<Nothing if keep is defined, else All>', back='Nothing', tile_shape=[], no_refetch_from_above='~All', tensor_order_options=[], force_memory_hierarchy_order=True), bits_per_value_scale={'All': 1}, bits_per_action='weight_bits / n_weight_slices', direction='down'), ProcessingStage(name='SelectWordlineDrivers', spatial=[], component_class='ArrayRowDrivers', component_model=None, component_modeling_log=[], actions=[TensorHolderAction(name='read', energy=None, energy_scale=1, latency=None, latency_scale=1, extra_attributes_for_component_model=ParseExtras(), bits_per_action='1 if bits_per_action is None else bits_per_action')], enabled=True, area=None, total_area=None, area_scale=1, leak_power=None, total_leak_power=None, leak_power_scale=1, energy_scale=1, total_latency='sum(*action2latency.values())', latency_scale=1, n_parallel_instances=1, extra_attributes_for_component_model=_ExtraAttrs(average_input_value=1), tensors=Tensors(keep='input', may_keep='<Nothing if keep is defined, else All>', back='Nothing', tile_shape=[], no_refetch_from_above='~All', tensor_order_options=[], force_memory_hierarchy_order=True), bits_per_value_scale={'All': 1}, bits_per_action='input_bits / n_input_slices', direction='down'), ProcessingStage(name='ADC', spatial=[], component_class='ADC', component_model=None, component_modeling_log=[], actions=[TensorHolderAction(name='read', energy=None, energy_scale=1, latency=None, latency_scale=1, extra_attributes_for_component_model=ParseExtras(), bits_per_action='1 if bits_per_action is None else bits_per_action')], enabled=True, area=None, total_area=None, area_scale='adc_area_scale', leak_power=None, total_leak_power=None, leak_power_scale=1, energy_scale='adc_energy_scale', total_latency='sum(*action2latency.values())', latency_scale=1, n_parallel_instances=1, extra_attributes_for_component_model=_ExtraAttrs(n_bits='adc_resolution', throughput_scale=1, throughput='1 / cycle_period * cols_active_at_once * throughput_scale'), tensors=Tensors(keep='output', may_keep='<Nothing if keep is defined, else All>', back='Nothing', tile_shape=[], no_refetch_from_above='~All', tensor_order_options=[], force_memory_hierarchy_order=True), bits_per_value_scale={'All': 1}, bits_per_action='output_bits / n_sliced_psums', direction='up'), ProcessingStage(name='ColumnDrivers', spatial=[], component_class='ArrayColumnDrivers', component_model=None, component_modeling_log=[], actions=[TensorHolderAction(name='read', energy=None, energy_scale=1, latency='cycle_period / cols_active_at_once', latency_scale=1, extra_attributes_for_component_model=ParseExtras(), bits_per_action='1 if bits_per_action is None else bits_per_action')], enabled=True, area=None, total_area=None, area_scale=1, leak_power=None, total_leak_power=None, leak_power_scale=1, energy_scale=1, total_latency='sum(*action2latency.values())', latency_scale=1, n_parallel_instances=1, extra_attributes_for_component_model=_ExtraAttrs(), tensors=Tensors(keep='output', may_keep='<Nothing if keep is defined, else All>', back='Nothing', tile_shape=[], no_refetch_from_above='~All', tensor_order_options=[], force_memory_hierarchy_order=True), bits_per_value_scale={'All': 1}, bits_per_action='output_bits / n_sliced_psums', direction='up'), Fanout(name='ColumnOfSubBanks', spatial=[Spatial(name='array_reuse_input', fanout=16, may_reuse='All', loop_bounds=[], min_usage=1, reuse='input', usage_scale='n_weight_slices', power_gateable=False)]), ProcessingStage(name='ColumnBandwidthLimiter', spatial=[], component_class='Dummy', component_model=None, component_modeling_log=[], actions=[TensorHolderAction(name='read', energy=None, energy_scale=1, latency='cycle_period', latency_scale=1, extra_attributes_for_component_model=ParseExtras(), bits_per_action='1 if bits_per_action is None else bits_per_action')], enabled=True, area=None, total_area=None, area_scale=1, leak_power=None, total_leak_power=None, leak_power_scale=1, energy_scale=1, total_latency='sum(*action2latency.values())', latency_scale=1, n_parallel_instances=1, extra_attributes_for_component_model=_ExtraAttrs(), tensors=Tensors(keep='(weight | output) & Above', may_keep='<Nothing if keep is defined, else All>', back='Nothing', tile_shape=[], no_refetch_from_above='~All', tensor_order_options=[], force_memory_hierarchy_order=True), bits_per_value_scale={'weight': 'n_weight_slices / weight_bits / 2', 'output': 'n_sliced_psums / output_bits', 'All - (weight | output)': 0}, bits_per_action=None, direction='down'), Fanout(name='SubBank', spatial=[Spatial(name='array_reuse_output', fanout=64, may_reuse='All', loop_bounds=[], min_usage=1, reuse='output', usage_scale=1, power_gateable=False)]), Memory(name='CimUnit', spatial=[], component_class='MemoryCell', component_model=None, component_modeling_log=[], actions=[TensorHolderAction(name='read', energy=None, energy_scale=1, latency='cycle_period', latency_scale=1, extra_attributes_for_component_model=ParseExtras(), bits_per_action='1 if bits_per_action is None else bits_per_action'), TensorHolderAction(name='write', energy=None, energy_scale=1, latency=None, latency_scale=1, extra_attributes_for_component_model=ParseExtras(), bits_per_action='1 if bits_per_action is None else bits_per_action')], enabled=True, area=None, total_area=None, area_scale=1, leak_power=None, total_leak_power=None, leak_power_scale=1, energy_scale=1, total_latency='sum(*action2latency.values())', latency_scale=1, n_parallel_instances='n_weight_slices', extra_attributes_for_component_model=_ExtraAttrs(n_instances='cim_unit_width_cells * cim_unit_depth_cells'), tensors=Tensors(keep='weight', may_keep='<Nothing if keep is defined, else All>', back='Nothing', tile_shape=[], no_refetch_from_above='weight', tensor_order_options=[], force_memory_hierarchy_order=False), bits_per_value_scale={'All': 1}, bits_per_action='weight.bits_per_value / n_weight_slices / n_input_slices', size='cim_unit_width_cells * cim_unit_depth_cells * bits_per_cell * n_weight_slices'), ProcessingStage(name='C2CMultiplier', spatial=[], component_class='C2CMultiplier', component_model=None, component_modeling_log=[], actions=[TensorHolderAction(name='read', energy=None, energy_scale=1, latency=None, latency_scale=1, extra_attributes_for_component_model=ParseExtras(), bits_per_action='1 if bits_per_action is None else bits_per_action')], enabled=True, area=None, total_area=None, area_scale=1, leak_power=None, total_leak_power=None, leak_power_scale=1, energy_scale=1, total_latency='sum(*action2latency.values())', latency_scale=1, n_parallel_instances=1, extra_attributes_for_component_model=_ExtraAttrs(resolution='cim_unit_width_cells', a_hist='inputs_hist', b_bit_distribution='weight_bit_distribution', unit_capacitance=2e-15), tensors=Tensors(keep='input', may_keep='<Nothing if keep is defined, else All>', back='Nothing', tile_shape=[], no_refetch_from_above='~All', tensor_order_options=[], force_memory_hierarchy_order=True), bits_per_value_scale={'All': 1}, bits_per_action='input.bits_per_value / n_weight_slices / n_input_slices', direction='down'), ProcessingStage(name='C2CMultiplierPortB', spatial=[], component_class='C2CMultiplierPortB', component_model=None, component_modeling_log=[], actions=[TensorHolderAction(name='read', energy=None, energy_scale=1, latency=None, latency_scale=1, extra_attributes_for_component_model=ParseExtras(), bits_per_action='1 if bits_per_action is None else bits_per_action')], enabled=True, area=None, total_area=None, area_scale=1, leak_power=None, total_leak_power=None, leak_power_scale=1, energy_scale=1, total_latency='sum(*action2latency.values())', latency_scale=1, n_parallel_instances=1, extra_attributes_for_component_model=_ExtraAttrs(resolution='cim_unit_width_cells', a_hist='inputs_hist', b_bit_distribution='weight_bit_distribution', unit_capacitance=2e-15), tensors=Tensors(keep='weight', may_keep='<Nothing if keep is defined, else All>', back='Nothing', tile_shape=[], no_refetch_from_above='~All', tensor_order_options=[], force_memory_hierarchy_order=True), bits_per_value_scale={'All': 1}, bits_per_action='weight.bits_per_value / n_sliced_psums', direction='down'), Compute(name='FreeCompute', spatial=[], component_class='Dummy', component_model=None, component_modeling_log=[], actions=[Action(name='compute', energy=None, energy_scale=1, latency=None, latency_scale=1, extra_attributes_for_component_model=ParseExtras())], enabled='len(All) == 3', area=None, total_area=None, area_scale=1, leak_power=None, total_leak_power=None, leak_power_scale=1, energy_scale=1, total_latency='sum(*action2latency.values())', latency_scale=1, n_parallel_instances=1, extra_attributes_for_component_model=_ExtraAttrs())]), arch_globals_dependent_on_workload=ParseExtras(encoded_input_bits='input_bits', encoded_weight_bits='weight_bits', encoded_output_bits='output_bits', input_encoding_func='offset_encode_hist', weight_encoding_func='offset_encode_hist', signed_sum_across_inputs=True, signed_sum_across_weights=False, cim_unit_width_cells='supported_weight_bits', cim_unit_depth_cells=8, bits_per_cell=1, adc_resolution=8, voltage_dac_resolution=8, temporal_dac_resolution=1, dac_unit_resistance=5000, n_adc_per_bank=16, base_latency=6.4e-09, latency_columns_scale='dac_unit_resistance / 5000 * array_bitlines / 128', latency_dac_resolution_scale='voltage_dac_resolution / 8', no_faster_than_digital='max(0.5, latency_columns_scale * latency_dac_resolution_scale)', limited_by_temporal_dac='0.05e-9 * (2 ** temporal_dac_resolution - 1)', cycle_period='max(base_latency * no_faster_than_digital * voltage_latency_scale, limited_by_temporal_dac, 2e-9)', read_pulse_width='cycle_period', weight_bits='weight.bits_per_value', input_bits='input.bits_per_value', output_bits='output.bits_per_value', array_parallel_inputs='get_array_fanout_reuse_output(spec)', array_parallel_outputs='get_array_fanout_reuse_input(spec)', array_parallel_weights='get_array_fanout_total(spec)', array_wordlines='array_parallel_inputs * cim_unit_width_cells', array_bitlines='array_parallel_outputs * cim_unit_depth_cells', dac_resolution='max(voltage_dac_resolution, temporal_dac_resolution)', cols_active_at_once='array_parallel_outputs', in_b='encoded_input_bits', w_b='encoded_weight_bits', max_input_bits_per_slice='min(dac_resolution, in_b)', max_weight_bits_per_slice='min(cim_unit_width_cells * bits_per_cell, w_b)', average_input_bits_per_slice='encoded_input_bits / n_input_slices', average_weight_bits_per_slice='encoded_weight_bits / n_weight_slices', n_virtual_macs='max_input_bits_per_slice * max_weight_bits_per_slice * encoded_output_bits', ehtas='encoded_hist_to_avg_slice', in_enc_fn='input_encoding_func', w_enc_fn='weight_encoding_func', average_input_value='ehtas(in_enc_fn(inputs_hist), in_b, max_input_bits_per_slice)', average_weight_value='ehtas(w_enc_fn(weights_hist), w_b, max_weight_bits_per_slice)', input_bit_distribution='ehtas(in_enc_fn(inputs_hist), in_b, 1, return_per_slice=True)', weight_bit_distribution='ehtas(w_enc_fn(weights_hist), w_b, 1, return_per_slice=True)', min_weight_slices='ceil(min_supported_weight_bits / bits_per_cell / cim_unit_width_cells)', min_input_slices='ceil(min_supported_input_bits / max_input_bits_per_slice)', n_input_slices='max(ceil(in_b / max_input_bits_per_slice), min_input_slices)', n_weight_slices='max(ceil(w_b / max_weight_bits_per_slice), min_weight_slices)', n_sliced_psums='n_input_slices * n_weight_slices'), extra_attributes_for_all_component_models=ParseExtras(tech_node='tech_node', cycle_period='cycle_period', rows='array_wordlines', cols='array_bitlines', cols_active_at_once='array_parallel_outputs', cell_config='cell_config', average_input_value='average_input_value', average_cell_value='average_weight_value', voltage='voltage', temporal_dac_bits='temporal_dac_resolution', read_pulse_width='read_pulse_width', resolution='adc_resolution', n_adcs='n_adc_per_bank', width='encoded_output_bits'))"
90
+ ]
91
+ },
92
+ "execution_count": 1,
93
+ "metadata": {},
94
+ "output_type": "execute_result"
95
+ }
96
+ ],
97
+ "source": [
98
+ "from _scripts import (\n",
99
+ " display_important_variables,\n",
100
+ " get_spec,\n",
101
+ " bar_comparison,\n",
102
+ " bar_stacked,\n",
103
+ " bar,\n",
104
+ ")\n",
105
+ "display_important_variables('wang_vlsi_2022')\n",
106
+ "get_spec('wang_vlsi_2022').arch"
107
+ ]
108
+ },
109
+ {
110
+ "cell_type": "markdown",
111
+ "id": "a652cef4",
112
+ "metadata": {},
113
+ "source": [
114
+ "#### Description of The Macro\n",
115
+ "\n",
116
+ "The macro uses a 512-row by 128-column SRAM array. Every eight adjacent columns\n",
117
+ "store 8b weight slices and together compute one 8b MAC operation. An 8b voltage\n",
118
+ "DAC provides 8b inputs in a single cycle. An analog-digital C-2C multiplier\n",
119
+ "computes a MAC operation between an 8b analog input and an 8b digital weight.\n",
120
+ "These multipliers allow the macro to compute 8b MACs in a single cycle and read\n",
121
+ "the results with one ADC convert. Furthermore, they allow the macro to avoid\n",
122
+ "connecting memory cells to analog circuits.\n",
123
+ "\n",
124
+ "Every eight rows in the array share a C-2C multiplier. These rows are activated\n",
125
+ "in separate cycles, so it requires eight cycles to activate all rows in the\n",
126
+ "array.\n"
127
+ ]
128
+ },
129
+ {
130
+ "cell_type": "code",
131
+ "execution_count": null,
132
+ "id": "e654dbb4",
133
+ "metadata": {},
134
+ "outputs": [],
135
+ "source": [
136
+ "import accelforge as af\n",
137
+ "import matplotlib.pyplot as plt"
138
+ ]
139
+ },
140
+ {
141
+ "cell_type": "markdown",
142
+ "id": "6b448caf",
143
+ "metadata": {},
144
+ "source": [
145
+ "#### Area Breakdown\n",
146
+ "\n",
147
+ "This test replicates the results of Fig. 22(b) of the paper.\n",
148
+ "\n",
149
+ "We show the area breakdown of the macro. The area is broken down into the\n",
150
+ "following components:\n",
151
+ "\n",
152
+ "- ADC: Area consumed by the ADC\n",
153
+ "- DAC: Area consumed by the DAC\n",
154
+ "- MAC: Area consumed by the MAC, including the row drivers, select wordline\n",
155
+ " drivers, CiM unit, and C-2C multiplier.\n",
156
+ "- Misc: Area consumed by the weight drivers and control circuitry.\n",
157
+ "\n",
158
+ "Modeled miscellaneous area is lower than reference because we do not model\n",
159
+ "the control circuitry in the weight drivers.\n"
160
+ ]
161
+ },
162
+ {
163
+ "cell_type": "code",
164
+ "execution_count": null,
165
+ "id": "2f7ce1a4",
166
+ "metadata": {},
167
+ "outputs": [],
168
+ "source": [
169
+ "parsed = get_spec('wang_vlsi_2022').calculate_component_area_energy_latency_leak()\n",
170
+ "\n",
171
+ "area = parsed.arch.per_component_total_area\n",
172
+ "\n",
173
+ "TOTAL_AREA = 0.124e-6\n",
174
+ "\n",
175
+ "expected_area = {\n",
176
+ " \"ADC\": 0.13 * TOTAL_AREA,\n",
177
+ " \"DAC\": 0.3 * TOTAL_AREA,\n",
178
+ " \"MAC\": 0.46 * TOTAL_AREA,\n",
179
+ " \"Misc\": 0.11 * TOTAL_AREA,\n",
180
+ "}\n",
181
+ "modeled = {}\n",
182
+ "modeled[\"ADC\"] = area[\"ADC\"] + area[\"ColumnDrivers\"] + area[\"ColumnBandwidthLimiter\"]\n",
183
+ "modeled[\"DAC\"] = area[\"DAC\"]\n",
184
+ "modeled[\"MAC\"] = area[\"CimUnit\"] + area[\"C2CMultiplier\"] + area[\"C2CMultiplierPortB\"] + area[\"RowDrivers\"] + area[\"SelectWordlineDrivers\"]\n",
185
+ "modeled[\"Misc\"] = sum(area.values()) - sum(modeled.values()) + area[\"WeightDrivers\"]\n",
186
+ "total_area = sum(modeled.values())\n",
187
+ "\n",
188
+ "fig, ax = plt.subplots(1, 1, figsize=(10, 5))\n",
189
+ "bar_comparison(\n",
190
+ " {\n",
191
+ " \"Modeled\": modeled,\n",
192
+ " \"Expected\": expected_area,\n",
193
+ " },\n",
194
+ " \"Component\",\n",
195
+ " \"Area (mm^2)\",\n",
196
+ " \"Area Breakdown: Modeled vs Expected\",\n",
197
+ " ax,\n",
198
+ ")"
199
+ ]
200
+ },
201
+ {
202
+ "cell_type": "markdown",
203
+ "id": "e8a29514",
204
+ "metadata": {},
205
+ "source": [
206
+ "#### Energy Breakdown\n",
207
+ "\n",
208
+ "This test replicates the results of Fig. 22(a) of the paper. We show the energy\n",
209
+ "breakdown of the macro. The energy is broken down into the following components:\n",
210
+ "\n",
211
+ "- ADC: Energy consumed by the ADC\n",
212
+ "- DAC: Energy consumed by the DAC\n",
213
+ "- MAC: Energy consumed by the MAC, including the row drivers, select wordline drivers,\n",
214
+ " CiM unit, and C-2C multiplier.\n",
215
+ "- Misc: The weight drivers are miscellaneous components in our model, but they consume\n",
216
+ " no energy in this weight-stationary test. Misc also includes control circuitry in the\n",
217
+ " reference.\n",
218
+ " \n",
219
+ "Modeled miscellaneous energy is lower than reference because we do not model the control\n",
220
+ "circuitry."
221
+ ]
222
+ },
223
+ {
224
+ "cell_type": "code",
225
+ "execution_count": null,
226
+ "id": "47c5a41b",
227
+ "metadata": {},
228
+ "outputs": [],
229
+ "source": [
230
+ "spec = get_spec('wang_vlsi_2022', add_dummy_main_memory=True)\n",
231
+ "spec.mapper.ffm.metrics = af.mapper.FFM.Metrics.ENERGY\n",
232
+ "results = af.mapper.FFM.map_workload_to_arch(spec)\n",
233
+ "energy = results.energy(per_component=True)\n",
234
+ "\n",
235
+ "TOPS_PER_WATT = 16.37572276\n",
236
+ "PJ_PER_MVM = 2 / TOPS_PER_WATT * 16 * 64# * 8\n",
237
+ "\n",
238
+ "expected_energy = {\n",
239
+ " \"ADC\": PJ_PER_MVM * 0.34 * 1e-12,\n",
240
+ " \"DAC\": PJ_PER_MVM * 0.22 * 1e-12,\n",
241
+ " \"MAC\": PJ_PER_MVM * 0.4 * 1e-12,\n",
242
+ " \"Misc\": PJ_PER_MVM * 0.04 * 1e-12,\n",
243
+ "}\n",
244
+ "\n",
245
+ "modeled = {}\n",
246
+ "modeled[\"ADC\"] = energy[\"ADC\"] + energy[\"ColumnDrivers\"] + energy[\"ColumnBandwidthLimiter\"]\n",
247
+ "modeled[\"DAC\"] = energy[\"DAC\"]\n",
248
+ "modeled[\"MAC\"] = energy[\"CimUnit\"] + energy[\"C2CMultiplier\"] + energy[\"C2CMultiplierPortB\"] + energy[\"RowDrivers\"] + energy[\"SelectWordlineDrivers\"]\n",
249
+ "modeled[\"Misc\"] = sum(energy.values()) - sum(modeled.values()) + energy[\"WeightDrivers\"]\n",
250
+ "\n",
251
+ "fig, ax = plt.subplots(1, 1, figsize=(10, 5))\n",
252
+ "bar_comparison(\n",
253
+ " {\n",
254
+ " \"Modeled\": modeled,\n",
255
+ " \"Expected\": expected_energy,\n",
256
+ " },\n",
257
+ " \"Component\",\n",
258
+ " \"Energy (J)\",\n",
259
+ " \"Energy Breakdown: Modeled vs Expected\",\n",
260
+ " ax,\n",
261
+ ")"
262
+ ]
263
+ },
264
+ {
265
+ "cell_type": "markdown",
266
+ "id": "90204682",
267
+ "metadata": {},
268
+ "source": [
269
+ "#### Energy Efficiency, Throughput, and Compute Density\n",
270
+ "\n",
271
+ "This test replicates the results of Table III in the paper.\n",
272
+ "\n",
273
+ "In this test, we show the energy efficiency, throughput, and compute density\n",
274
+ "of the macro at 0.7V and 1.1V supply voltages.\n",
275
+ "\n",
276
+ "We see that increasing the supply voltage increases throughput at the cost of\n",
277
+ "lower energy efficiency."
278
+ ]
279
+ },
280
+ {
281
+ "cell_type": "code",
282
+ "execution_count": null,
283
+ "id": "8df3fd5d",
284
+ "metadata": {},
285
+ "outputs": [],
286
+ "source": [
287
+ "spec = get_spec('wang_vlsi_2022', add_dummy_main_memory=True)\n",
288
+ "spec.mapper.ffm.metrics = af.mapper.Metrics.ENERGY\n",
289
+ "\n",
290
+ "spec_parsed = spec.calculate_component_area_energy_latency_leak()\n",
291
+ "spec.variables.voltage = 0.7\n",
292
+ "results_a = af.mapper.FFM.map_workload_to_arch(spec)\n",
293
+ "spec.variables.voltage = 1.1\n",
294
+ "results_b = af.mapper.FFM.map_workload_to_arch(spec)"
295
+ ]
296
+ },
297
+ {
298
+ "cell_type": "code",
299
+ "execution_count": null,
300
+ "id": "6ef3fe67",
301
+ "metadata": {},
302
+ "outputs": [],
303
+ "source": [
304
+ "per_compute_latency_a = results_a.per_compute().to_dict()[\"Total<SEP>latency\"]\n",
305
+ "per_compute_latency_b = results_b.per_compute().to_dict()[\"Total<SEP>latency\"]\n",
306
+ "\n",
307
+ "tops_a = 2 / results_a.per_compute().to_dict()[\"Total<SEP>latency\"] / 1e12\n",
308
+ "tops_b = 2 / results_b.per_compute().to_dict()[\"Total<SEP>latency\"] / 1e12\n",
309
+ "\n",
310
+ "tops_per_mm_a = tops_a / total_area / 1e6\n",
311
+ "tops_per_mm_b = tops_b / total_area / 1e6\n",
312
+ "\n",
313
+ "tops_per_w_a = 2 / results_a.per_compute().energy() / 1e12\n",
314
+ "tops_per_w_b = 2 / results_b.per_compute().energy() / 1e12\n",
315
+ "\n",
316
+ "# Structure the dictionaries flat from the start\n",
317
+ "modeled = {\n",
318
+ " \"0.7V tops_per_mm2\": tops_per_mm_a,\n",
319
+ " \"0.7V tops_per_w\": tops_per_w_a,\n",
320
+ " \"0.7V tops\": tops_a,\n",
321
+ " \"1.1V tops_per_mm2\": tops_per_mm_b,\n",
322
+ " \"1.1V tops_per_w\": tops_per_w_b,\n",
323
+ " \"1.1V tops\": tops_b,\n",
324
+ "}\n",
325
+ "\n",
326
+ "expected = {\n",
327
+ " \"0.7V tops_per_mm2\": 2.4,\n",
328
+ " \"0.7V tops_per_w\": 32.2,\n",
329
+ " \"0.7V tops\": 0.3,\n",
330
+ " \"1.1V tops_per_mm2\": 4.0,\n",
331
+ " \"1.1V tops_per_w\": 15.5,\n",
332
+ " \"1.1V tops\": 0.5,\n",
333
+ "}\n",
334
+ "\n",
335
+ "fig, ax = plt.subplots(1, 1, figsize=(15, 6))\n",
336
+ "ax.set_yscale('log')\n",
337
+ "bar_comparison(\n",
338
+ " {\n",
339
+ " \"Modeled\": modeled,\n",
340
+ " \"Expected\": expected,\n",
341
+ " },\n",
342
+ " \"Metric\",\n",
343
+ " \"Value\",\n",
344
+ " \"Energy Efficiency, Throughput, and Compute Density\",\n",
345
+ " ax,\n",
346
+ ")"
347
+ ]
348
+ },
349
+ {
350
+ "cell_type": "code",
351
+ "execution_count": null,
352
+ "id": "c92021a2",
353
+ "metadata": {},
354
+ "outputs": [],
355
+ "source": [
356
+ "\n",
357
+ "\n",
358
+ "\n",
359
+ "\n",
360
+ "\n",
361
+ "\n",
362
+ "# # def test_voltage_scaling():\n",
363
+ "# # \"\"\"\n",
364
+ "# # ### Voltage Scaling\n",
365
+ "\n",
366
+ "# # This test replicates the results of Fig. 23 of the paper.\n",
367
+ "\n",
368
+ "# # We show the effects of voltage scaling on the energy efficiency and\n",
369
+ "# # throughput of the macro, testing supply voltages of 0.7V, 0.8V, 0.9V, 1V,\n",
370
+ "# # and 1.1V.\n",
371
+ "\n",
372
+ "# # We can see that increasing the supply voltage increases throughput and\n",
373
+ "# # compute density at the cost of lower energy efficiency.\n",
374
+ "\n",
375
+ "# # Modeled and reference compute density varies because we did not model the\n",
376
+ "# # area of some miscellaneous components, leading to the model having a smaller\n",
377
+ "# # area and higher compute density. This could be corrected by adding\n",
378
+ "# # additional components to the model. We also use a different scaling factor\n",
379
+ "# # for voltage versus energy, leading to a different curve shape. This could be\n",
380
+ "# # corrected by adjusting the VOLTAGE_ENERGY_SCALE formula and propagating the\n",
381
+ "# # value to each subcomponent model.\n",
382
+ "# # \"\"\"\n",
383
+ "# # results = utl.parallel_test(\n",
384
+ "# # utl.delayed(utl.quick_run)(macro=MACRO_NAME, variables=dict(VOLTAGE=x))\n",
385
+ "# # for x in [0.7, 0.8, 0.9, 1, 1.1]\n",
386
+ "# # )\n",
387
+ "\n",
388
+ "# # for r, tops_mm, tops_w in zip(\n",
389
+ "# # results,\n",
390
+ "# # [2.377, 2.858, 3.200, 3.596, 3.941],\n",
391
+ "# # [31.998, 22.590, 18.439, 16.376, 15.467],\n",
392
+ "# # ):\n",
393
+ "# # r.add_compare_ref(\"tops_per_mm2\", tops_mm)\n",
394
+ "# # r.add_compare_ref(\"tops_per_w\", tops_w)\n",
395
+ "# # r.add_compare_ref(\"tops\", tops_mm * TOTAL_AREA / 1e6)\n",
396
+ "# # return results\n",
397
+ "\n",
398
+ "\n",
399
+ "# # def test_tops():\n",
400
+ "# # \"\"\"\n",
401
+ "# # ### Energy Efficiency, Throughput, and Compute Density\n",
402
+ "\n",
403
+ "# # This test replicates the results of Table III in the paper.\n",
404
+ "\n",
405
+ "# # In this test, we show the energy efficiency, throughput, and compute density\n",
406
+ "# # of the macro at 0.7V and 1.1V supply voltages.\n",
407
+ "\n",
408
+ "# # We see that increasing the supply voltage increases throughput at the cost of\n",
409
+ "# # lower energy efficiency.\n",
410
+ "# # \"\"\"\n",
411
+ "# # results = utl.parallel_test(\n",
412
+ "# # utl.delayed(utl.quick_run)(macro=MACRO_NAME, variables=dict(VOLTAGE=x))\n",
413
+ "# # for x in [0.7, 1.1]\n",
414
+ "# # )\n",
415
+ "# # for r, tops_mm, tops_w, tops in zip(\n",
416
+ "# # results,\n",
417
+ "# # [2.4, 4.0],\n",
418
+ "# # [32.2, 15.5],\n",
419
+ "# # [0.3, 0.5],\n",
420
+ "# # ):\n",
421
+ "# # r.add_compare_ref(\"tops_per_mm2\", tops_mm)\n",
422
+ "# # r.add_compare_ref(\"tops_per_w\", tops_w)\n",
423
+ "# # r.add_compare_ref(\"tops\", tops)\n",
424
+ "# # return results\n",
425
+ "\n",
426
+ "\n",
427
+ "# # def test_full_system_dnn(dnn_name: str, batch_size: int = None):\n",
428
+ "# # \"\"\"\n",
429
+ "# # ### Exploration of Full-System Energy Efficiency\n",
430
+ "\n",
431
+ "# # In this test, we look at the full-system energy breakdown when running DNNs\n",
432
+ "# # on a CiM accelerator. We place the macro in a chip with local input/output\n",
433
+ "# # buffers, routers for on-chip data movement, a global buffer, and DRAM. We\n",
434
+ "# # show the area and energy spent on DRAM, the global buffer, and other\n",
435
+ "# # components.\n",
436
+ "\n",
437
+ "# # We compare three scenarios:\n",
438
+ "\n",
439
+ "# # 1. Inputs, outputs, and weights stored off-chip in DRAM and fetched for each\n",
440
+ "# # layer\n",
441
+ "# # 2. Inputs and outputs fetched from DRAM, weights stationary (pre-loaded for\n",
442
+ "# # each layer)\n",
443
+ "# # 3. Weights stationary, layers fused to keep inputs/outputs on-chip in the\n",
444
+ "# # global\n",
445
+ "\n",
446
+ "# # We can see that weight-stationary processing significantly reduces overall\n",
447
+ "# # energy due to fewer weight fetches from off-chip. Benefits are limited,\n",
448
+ "# # however, because inputs and outputs still must be fetched from off-chip. To\n",
449
+ "# # see further benefits, fusing layers is necessary to keep data on-chip\n",
450
+ "# # between DNN layers. We note that weight-stationary CiM requires sufficient\n",
451
+ "# # memory to keep all DNN weights on-chip. To store large DNNs, this may\n",
452
+ "# # require a multi-chip pipeline or dense storage technologies.\n",
453
+ "# # \"\"\"\n",
454
+ "\n",
455
+ "# # dnn_dir = utl.path_from_model_dir(f\"workloads/{dnn_name}\")\n",
456
+ "# # layer_paths = [\n",
457
+ "# # os.path.join(dnn_dir, l) for l in os.listdir(dnn_dir) if l.endswith(\".yaml\")\n",
458
+ "# # ]\n",
459
+ "\n",
460
+ "# # layer_paths = [l for l in layer_paths if \"From einsum\" not in open(l, \"r\").read()]\n",
461
+ "\n",
462
+ "# # if \"gpt2_medium\" in dnn_name:\n",
463
+ "# # layer_paths = layer_paths[:-1]\n",
464
+ "\n",
465
+ "# # def callfunc(spec):\n",
466
+ "# # spec.architecture.find(\"shared_router_group\").spatial.meshX = 64\n",
467
+ "# # spec.architecture.find(\"shared_router_group\").attributes.has_power_gating = True\n",
468
+ "# # spec.architecture.find(\"shared_router_group\").constraints.spatial.no_reuse = []\n",
469
+ "\n",
470
+ "# # spec.architecture.find(\"tile_in_chip\").spatial.meshX = 16\n",
471
+ "# # spec.architecture.find(\"tile_in_chip\").attributes.has_power_gating = True\n",
472
+ "# # spec.architecture.find(\"tile_in_chip\").constraints.spatial.no_reuse = []\n",
473
+ "\n",
474
+ "# # if batch_size is not None:\n",
475
+ "# # spec.problem.instance[\"N\"] = batch_size\n",
476
+ "# # spec.architecture.find(\"output_buffer\").constraints.temporal.iter_only = []\n",
477
+ "\n",
478
+ "# # results = utl.parallel_test(\n",
479
+ "# # utl.delayed(utl.run_layer)(\n",
480
+ "# # macro=MACRO_NAME,\n",
481
+ "# # layer=l,\n",
482
+ "# # variables=dict(EXPERIMENT_NAME=s),\n",
483
+ "# # tile=\"input_output_bufs\",\n",
484
+ "# # chip=\"large_router_glb\",\n",
485
+ "# # system=system,\n",
486
+ "# # callfunc=callfunc,\n",
487
+ "# # )\n",
488
+ "# # for l in layer_paths\n",
489
+ "# # for s, system in (\n",
490
+ "# # ('\"All Tensors Off-Chip\"', \"fetch_all_lpddr4\"),\n",
491
+ "# # ('\"Weight-Stationary\"', \"fetch_weights_lpddr4\"),\n",
492
+ "# # ('\"Weight-Stationary + Fusion\"', None),\n",
493
+ "# # )\n",
494
+ "# # )\n",
495
+ "\n",
496
+ "# # for r in results:\n",
497
+ "# # r.per_component_energy.setdefault(\"main_memory\", 0)\n",
498
+ "\n",
499
+ "# # results.combine_per_component_energy(\n",
500
+ "# # [\n",
501
+ "# # \"c2c_multiplier_analog_port\",\n",
502
+ "# # \"c2c_multiplier_digital_port\",\n",
503
+ "# # \"cim_unit\",\n",
504
+ "# # \"adc\",\n",
505
+ "# # \"select_wordline_drivers\",\n",
506
+ "# # \"row_drivers\",\n",
507
+ "# # \"dac\",\n",
508
+ "# # \"output_buffer\",\n",
509
+ "# # \"input_buffer\",\n",
510
+ "# # \"router\",\n",
511
+ "# # \"weight_drivers\",\n",
512
+ "# # \"column_drivers\",\n",
513
+ "# # ],\n",
514
+ "# # \"Macro & Other On-Chip Data Movement\",\n",
515
+ "# # )\n",
516
+ "# # results.combine_per_component_energy([\"glb\"], \"Global Buffer\")\n",
517
+ "# # results.combine_per_component_energy([\"main_memory\"], \"Off-Chip DRAM\")\n",
518
+ "# # results.clear_zero_energies()\n",
519
+ "\n",
520
+ "# # return results\n",
521
+ "\n",
522
+ "\n",
523
+ "# if __name__ == \"__main__\":\n",
524
+ "# test_energy_breakdown(),\n",
525
+ "# test_area_breakdown(),\n",
526
+ "# test_voltage_scaling(),\n",
527
+ "# test_tops(),\n",
528
+ "# test_full_system_dnn(\"resnet18\")\n",
529
+ "\n",
530
+ "\n",
531
+ "\n",
532
+ "# def test_tops():\n",
533
+ "# \"\"\"\n",
534
+ "# \"\"\"\n",
535
+ "# results = utl.parallel_test(\n",
536
+ "# utl.delayed(utl.quick_run)(macro=MACRO_NAME, variables=dict(VOLTAGE=x))\n",
537
+ "# for x in [0.7, 1.1]\n",
538
+ "# )\n",
539
+ "# for r, tops_mm, tops_w, tops in zip(\n",
540
+ "# results,\n",
541
+ "# [2.4, 4.0],\n",
542
+ "# [32.2, 15.5],\n",
543
+ "# [0.3, 0.5],\n",
544
+ "# ):\n",
545
+ "# r.add_compare_ref(\"tops_per_mm2\", tops_mm)\n",
546
+ "# r.add_compare_ref(\"tops_per_w\", tops_w)\n",
547
+ "# r.add_compare_ref(\"tops\", tops)\n",
548
+ "# return results\n",
549
+ "\n",
550
+ "\n",
551
+ "\n",
552
+ "\n",
553
+ "\n",
554
+ "# result = run_test(\"wang_vlsi_2022\", \"test_tops\")\n",
555
+ "\n",
556
+ "# fig, ax = plt.subplots(1, 3, figsize=(15, 5))\n",
557
+ "# bar_side_by_side(\n",
558
+ "# {r.variables[\"VOLTAGE\"]: r.tops for r in result},\n",
559
+ "# xlabel=\"Voltage (V)\",\n",
560
+ "# ylabel=\"Throughput (TOPS)\",\n",
561
+ "# title=\"Voltage vs. Throughput\",\n",
562
+ "# ax=ax[0],\n",
563
+ "# )\n",
564
+ "# bar_side_by_side(\n",
565
+ "# {r.variables[\"VOLTAGE\"]: r.tops_per_w for r in result},\n",
566
+ "# xlabel=\"Voltage (V)\",\n",
567
+ "# ylabel=\"Energy Efficiency (TOPS/W)\",\n",
568
+ "# title=\"Voltage vs. Energy Efficiency\",\n",
569
+ "# ax=ax[1],\n",
570
+ "# )\n",
571
+ "# bar_side_by_side(\n",
572
+ "# {r.variables[\"VOLTAGE\"]: r.tops_per_mm2 for r in result},\n",
573
+ "# xlabel=\"Voltage (V)\",\n",
574
+ "# ylabel=\"Compute Density (TOPS/mm^2)\",\n",
575
+ "# title=\"Voltage vs. Compute Density\",\n",
576
+ "# ax=ax[2],\n",
577
+ "# )"
578
+ ]
579
+ }
580
+ ],
581
+ "metadata": {
582
+ "kernelspec": {
583
+ "display_name": "Python 3",
584
+ "language": "python",
585
+ "name": "python3"
586
+ },
587
+ "language_info": {
588
+ "codemirror_mode": {
589
+ "name": "ipython",
590
+ "version": 3
591
+ },
592
+ "file_extension": ".py",
593
+ "mimetype": "text/x-python",
594
+ "name": "python",
595
+ "nbconvert_exporter": "python",
596
+ "pygments_lexer": "ipython3",
597
+ "version": "3.12.11"
598
+ }
599
+ },
600
+ "nbformat": 4,
601
+ "nbformat_minor": 5
602
+ }
notebooks/paths.py ADDED
@@ -0,0 +1,4 @@
1
+ import os
2
+ from pathlib import Path
3
+
4
+ EXAMPLES_DIR = Path(os.path.abspath("../../examples"))