accelforge 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (258) hide show
  1. accelforge/__init__.py +21 -0
  2. accelforge/_accelerated_imports.py +16 -0
  3. accelforge/_deprecate/_simanneal/evalmapping.py +271 -0
  4. accelforge/_deprecate/_simanneal/mapspaceglobals.py +298 -0
  5. accelforge/_deprecate/_simanneal/simanneal.py +666 -0
  6. accelforge/_deprecate/_simanneal/tracking.py +105 -0
  7. accelforge/_deprecate/_simanneal/wrappers.py +218 -0
  8. accelforge/_deprecate/_simanneal2/__init__.py +7 -0
  9. accelforge/_deprecate/_simanneal2/simanneal.py +493 -0
  10. accelforge/_deprecate/_simanneal2/tracking.py +116 -0
  11. accelforge/_deprecate/compatibility_util.py +181 -0
  12. accelforge/_deprecate/layerdeduplication/__init__.py +2 -0
  13. accelforge/_deprecate/layerdeduplication/group_similar_einsums.py +160 -0
  14. accelforge/_deprecate/layerdeduplication/grouped_einsums.py +84 -0
  15. accelforge/_deprecate/mapping_filter_tags/__init__.py +2 -0
  16. accelforge/_deprecate/mapping_filter_tags/ffmt.py +212 -0
  17. accelforge/_deprecate/mapping_filter_tags/onesplit.py +24 -0
  18. accelforge/_deprecate/mapping_filter_tags/util.py +24 -0
  19. accelforge/_deprecate/tags.py +69 -0
  20. accelforge/_deprecate/viz/__init__.py +0 -0
  21. accelforge/_deprecate/viz/interactive.py +159 -0
  22. accelforge/_deprecate/viz/reservationtree.py +307 -0
  23. accelforge/_deprecate/viz/ski_slope.py +88 -0
  24. accelforge/_version.py +15 -0
  25. accelforge/examples.py +39 -0
  26. accelforge/frontend/__init__.py +10 -0
  27. accelforge/frontend/_binding.py +129 -0
  28. accelforge/frontend/_workload_isl/__init__.py +2 -0
  29. accelforge/frontend/_workload_isl/_isl.py +149 -0
  30. accelforge/frontend/_workload_isl/_symbolic.py +141 -0
  31. accelforge/frontend/arch copy.py +1544 -0
  32. accelforge/frontend/arch.py +1642 -0
  33. accelforge/frontend/config.py +63 -0
  34. accelforge/frontend/mapper/__init__.py +5 -0
  35. accelforge/frontend/mapper/ffm.py +126 -0
  36. accelforge/frontend/mapper/mapper.py +7 -0
  37. accelforge/frontend/mapper/metrics.py +30 -0
  38. accelforge/frontend/mapping/__init__.py +1 -0
  39. accelforge/frontend/mapping/mapping.py +1736 -0
  40. accelforge/frontend/model.py +14 -0
  41. accelforge/frontend/renames.py +150 -0
  42. accelforge/frontend/spec copy.py +230 -0
  43. accelforge/frontend/spec.py +301 -0
  44. accelforge/frontend/variables.py +12 -0
  45. accelforge/frontend/workload.py +952 -0
  46. accelforge/mapper/FFM/__init__.py +9 -0
  47. accelforge/mapper/FFM/_join_pmappings/__init__.py +0 -0
  48. accelforge/mapper/FFM/_join_pmappings/compatibility.py +653 -0
  49. accelforge/mapper/FFM/_join_pmappings/compress_pmappings.py +140 -0
  50. accelforge/mapper/FFM/_join_pmappings/join_pmappings.py +703 -0
  51. accelforge/mapper/FFM/_join_pmappings/pmapping_dataframe.py +901 -0
  52. accelforge/mapper/FFM/_join_pmappings/pmapping_group.py +337 -0
  53. accelforge/mapper/FFM/_make_pmappings/contraints/__init__.py +0 -0
  54. accelforge/mapper/FFM/_make_pmappings/contraints/constraints.py +360 -0
  55. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/__init__.py +1 -0
  56. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_loops.py +373 -0
  57. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_pmapping_templates.py +463 -0
  58. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_reservations.py +95 -0
  59. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storage_order.py +382 -0
  60. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storages.py +155 -0
  61. accelforge/mapper/FFM/_make_pmappings/make_pmappings.py +411 -0
  62. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/__init__.py +1 -0
  63. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_pmappings_from_templates.py +407 -0
  64. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_tile_shapes.py +1681 -0
  65. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/run_model.py +170 -0
  66. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/symbol_relations.py +174 -0
  67. accelforge/mapper/FFM/_make_pmappings/pmapper_job.py +282 -0
  68. accelforge/mapper/FFM/_pareto_df/df_convention.py +273 -0
  69. accelforge/mapper/FFM/_pareto_df/pareto copy.py +836 -0
  70. accelforge/mapper/FFM/_pareto_df/pareto.py +508 -0
  71. accelforge/mapper/FFM/data.py +61 -0
  72. accelforge/mapper/FFM/main copy.py +236 -0
  73. accelforge/mapper/FFM/main.py +208 -0
  74. accelforge/mapper/FFM/mappings.py +510 -0
  75. accelforge/mapper/FFM/pmappings.py +310 -0
  76. accelforge/mapper/__init__.py +4 -0
  77. accelforge/mapper.py +0 -0
  78. accelforge/model/__init__.py +1 -0
  79. accelforge/model/_looptree/__init__.py +0 -0
  80. accelforge/model/_looptree/accesses.py +335 -0
  81. accelforge/model/_looptree/capacity/__init__.py +1 -0
  82. accelforge/model/_looptree/capacity/aggregators.py +36 -0
  83. accelforge/model/_looptree/capacity/capacity.py +47 -0
  84. accelforge/model/_looptree/energy.py +150 -0
  85. accelforge/model/_looptree/equivalent_ranks.py +29 -0
  86. accelforge/model/_looptree/latency/__init__.py +1 -0
  87. accelforge/model/_looptree/latency/latency.py +98 -0
  88. accelforge/model/_looptree/latency/memory.py +120 -0
  89. accelforge/model/_looptree/latency/processors.py +92 -0
  90. accelforge/model/_looptree/mapping_utilities.py +71 -0
  91. accelforge/model/_looptree/reuse/__init__.py +4 -0
  92. accelforge/model/_looptree/reuse/isl/__init__.py +1 -0
  93. accelforge/model/_looptree/reuse/isl/des.py +59 -0
  94. accelforge/model/_looptree/reuse/isl/isl_functions.py +374 -0
  95. accelforge/model/_looptree/reuse/isl/mapping_to_isl/__init__.py +4 -0
  96. accelforge/model/_looptree/reuse/isl/mapping_to_isl/analyze_mapping.py +297 -0
  97. accelforge/model/_looptree/reuse/isl/mapping_to_isl/skews_from_mapping.py +236 -0
  98. accelforge/model/_looptree/reuse/isl/mapping_to_isl/tiling.py +685 -0
  99. accelforge/model/_looptree/reuse/isl/mapping_to_isl/types.py +188 -0
  100. accelforge/model/_looptree/reuse/isl/spatial.py +260 -0
  101. accelforge/model/_looptree/reuse/isl/temporal.py +182 -0
  102. accelforge/model/_looptree/reuse/symbolic/__init__.py +1 -0
  103. accelforge/model/_looptree/reuse/symbolic/symbolic copy 2.py +1346 -0
  104. accelforge/model/_looptree/reuse/symbolic/symbolic copy.py +1408 -0
  105. accelforge/model/_looptree/reuse/symbolic/symbolic.py +1396 -0
  106. accelforge/model/_looptree/run.py +122 -0
  107. accelforge/model/_looptree/types.py +26 -0
  108. accelforge/model/_looptree/visualization/__init__.py +0 -0
  109. accelforge/model/_looptree/visualization/occupancy.py +11 -0
  110. accelforge/model/main.py +222 -0
  111. accelforge/plotting/__init__.py +2 -0
  112. accelforge/plotting/mappings.py +219 -0
  113. accelforge/plotting/specs.py +57 -0
  114. accelforge/util/__init__.py +4 -0
  115. accelforge/util/_base_analysis_types.py +24 -0
  116. accelforge/util/_basetypes.py +1089 -0
  117. accelforge/util/_frozenset.py +36 -0
  118. accelforge/util/_isl.py +29 -0
  119. accelforge/util/_itertools.py +14 -0
  120. accelforge/util/_mathfuncs.py +57 -0
  121. accelforge/util/_parse_expressions.py +339 -0
  122. accelforge/util/_picklecache.py +32 -0
  123. accelforge/util/_setexpressions.py +268 -0
  124. accelforge/util/_sympy/__init__.py +0 -0
  125. accelforge/util/_sympy/broadcast_max.py +18 -0
  126. accelforge/util/_visualization.py +112 -0
  127. accelforge/util/_yaml.py +579 -0
  128. accelforge/util/parallel.py +193 -0
  129. accelforge-0.0.1.dist-info/METADATA +64 -0
  130. accelforge-0.0.1.dist-info/RECORD +258 -0
  131. accelforge-0.0.1.dist-info/WHEEL +5 -0
  132. accelforge-0.0.1.dist-info/licenses/LICENSE +19 -0
  133. accelforge-0.0.1.dist-info/top_level.txt +5 -0
  134. docs/_build/html/_sources/fastfusion.frontend.mapper.rst.txt +37 -0
  135. docs/_build/html/_sources/fastfusion.frontend.rst.txt +70 -0
  136. docs/_build/html/_sources/fastfusion.frontend.workload.rst.txt +21 -0
  137. docs/_build/html/_sources/fastfusion.mapper.FFM.rst.txt +37 -0
  138. docs/_build/html/_sources/fastfusion.mapper.rst.txt +18 -0
  139. docs/_build/html/_sources/fastfusion.rst.txt +20 -0
  140. docs/_build/html/_sources/fastfusion.util.rst.txt +21 -0
  141. docs/_build/html/_sources/index.rst.txt +87 -0
  142. docs/_build/html/_sources/modules.rst.txt +7 -0
  143. docs/_build/html/_sources/notes/citation.rst.txt +45 -0
  144. docs/_build/html/_sources/notes/definitions.rst.txt +43 -0
  145. docs/_build/html/_sources/notes/faqs.rst.txt +39 -0
  146. docs/_build/html/_sources/notes/modeling/accelerator_energy_latency.rst.txt +72 -0
  147. docs/_build/html/_sources/notes/modeling/component_energy_area.rst.txt +96 -0
  148. docs/_build/html/_sources/notes/modeling/mapping.rst.txt +100 -0
  149. docs/_build/html/_sources/notes/modeling.rst.txt +33 -0
  150. docs/_build/html/_sources/notes/parsing/arithmetic_parsing.rst.txt +136 -0
  151. docs/_build/html/_sources/notes/parsing/setexpressions.rst.txt +63 -0
  152. docs/_build/html/_sources/notes/parsing/yaml_parsing.rst.txt +176 -0
  153. docs/_build/html/_sources/notes/quickstart_and_installation.rst.txt +9 -0
  154. docs/_build/html/_sources/notes/spec/architecture.rst.txt +133 -0
  155. docs/_build/html/_sources/notes/spec/mapping.rst.txt +12 -0
  156. docs/_build/html/_sources/notes/spec/workload.rst.txt +83 -0
  157. docs/_build/html/_sources/notes/spec.rst.txt +36 -0
  158. docs/source/_ext/include_attrs.py +213 -0
  159. docs/source/_ext/include_docstring.py +364 -0
  160. docs/source/_ext/include_functions.py +154 -0
  161. docs/source/_ext/include_notebook.py +131 -0
  162. docs/source/_ext/include_yaml.py +119 -0
  163. docs/source/_ext/inherited_attributes.py +222 -0
  164. docs/source/_ext/paths.py +4 -0
  165. docs/source/conf.py +79 -0
  166. examples/arches/compute_in_memory/_include.yaml +74 -0
  167. examples/arches/compute_in_memory/_include_functions.py +229 -0
  168. examples/arches/compute_in_memory/_load_spec.py +57 -0
  169. examples/arches/compute_in_memory/components/c2c_multiplier.py +181 -0
  170. examples/arches/compute_in_memory/components/dac_c2c_r2r.py +605 -0
  171. examples/arches/compute_in_memory/components/misc.py +195 -0
  172. examples/arches/compute_in_memory/components/util/bit_functions.py +51 -0
  173. examples/arches/compute_in_memory/components/zero_comparator.py +92 -0
  174. examples/arches/compute_in_memory/isaac.yaml +233 -0
  175. examples/arches/compute_in_memory/memory_cells/ecram_demo.yaml +63 -0
  176. examples/arches/compute_in_memory/memory_cells/rram_example.yaml +63 -0
  177. examples/arches/compute_in_memory/memory_cells/rram_isaac_isca_2016.yaml +64 -0
  178. examples/arches/compute_in_memory/memory_cells/rram_neurosim_default.yaml +63 -0
  179. examples/arches/compute_in_memory/memory_cells/rram_raella_isca_2023.yaml +70 -0
  180. examples/arches/compute_in_memory/memory_cells/rram_wan_nature_2022.yaml +63 -0
  181. examples/arches/compute_in_memory/memory_cells/sram_colonnade_jssc_2021.yaml +63 -0
  182. examples/arches/compute_in_memory/memory_cells/sram_example.yaml +63 -0
  183. examples/arches/compute_in_memory/memory_cells/sram_jia_jssc_2020.yaml +63 -0
  184. examples/arches/compute_in_memory/memory_cells/sram_sinangil_jssc_2021.yaml +63 -0
  185. examples/arches/compute_in_memory/memory_cells/sram_wang_vlsi_2022.yaml +63 -0
  186. examples/arches/compute_in_memory/wang_vlsi_2022.yaml +289 -0
  187. examples/arches/eyeriss.yaml +68 -0
  188. examples/arches/fanout_variations/at_glb.yaml +31 -0
  189. examples/arches/fanout_variations/at_glb_with_fanout_node.yaml +34 -0
  190. examples/arches/fanout_variations/at_mac.yaml +31 -0
  191. examples/arches/fanout_variations/at_mac_with_constraints.yaml +38 -0
  192. examples/arches/fanout_variations/at_mac_with_fanout_node.yaml +34 -0
  193. examples/arches/nvdla.yaml +47 -0
  194. examples/arches/simple.yaml +28 -0
  195. examples/arches/tpu_v4i.yaml +67 -0
  196. examples/mappings/unfused_matmuls_to_simple.yaml +33 -0
  197. examples/misc/component_annotated.yaml +33 -0
  198. examples/workloads/gpt3_6.7B.yaml +124 -0
  199. examples/workloads/matmuls.yaml +20 -0
  200. examples/workloads/mobilenet_28.yaml +81 -0
  201. examples/workloads/mobilenet_various_separate.yaml +106 -0
  202. examples/workloads/three_matmuls_annotated.yaml +59 -0
  203. notebooks/.ipynb_checkpoints/fastfusion_arch_study_michael-checkpoint.ipynb +359 -0
  204. notebooks/compute_in_memory/_scripts.py +339 -0
  205. notebooks/compute_in_memory/isaac.guide.ipynb +270 -0
  206. notebooks/compute_in_memory/wang_vlsi_2022.ipynb +602 -0
  207. notebooks/paths.py +4 -0
  208. notebooks/tutorials/.ipynb_checkpoints/1_FFM-checkpoint.ipynb +3110 -0
  209. notebooks/tutorials/FFM.ipynb +3498 -0
  210. notebooks/tutorials/_include.py +48 -0
  211. notebooks/tutorials/component_energy_area.ipynb +363 -0
  212. tests/Q_mapping.yaml +38 -0
  213. tests/__init__.py +0 -0
  214. tests/conv.mapping.yaml +27 -0
  215. tests/conv.workload.yaml +13 -0
  216. tests/conv_sym.mapping.yaml +43 -0
  217. tests/copy.mapping.yaml +35 -0
  218. tests/copy.workload.yaml +15 -0
  219. tests/distribuffers/__init__.py +0 -0
  220. tests/distribuffers/multicast/test_cases.yaml +482 -0
  221. tests/distribuffers/spec/binding/valid_bindings.yaml +97 -0
  222. tests/distribuffers/spec/distributed.yaml +100 -0
  223. tests/distribuffers/spec/logical_arch.yaml +32 -0
  224. tests/distribuffers/spec/physical_arch.yaml +69 -0
  225. tests/distribuffers/test_binding.py +48 -0
  226. tests/frontend/__init__.py +0 -0
  227. tests/frontend/test_mapping_viz.py +52 -0
  228. tests/mapper/__init__.py +0 -0
  229. tests/mapper/configs/conv1d/conv1d.mapping.yaml +31 -0
  230. tests/mapper/configs/conv1d/conv1d.workload.yaml +11 -0
  231. tests/mapper/configs/two_conv1d/two_conv1d.expected.yaml +38 -0
  232. tests/mapper/configs/two_conv1d/two_conv1d.mapping.yaml +54 -0
  233. tests/mapper/configs/two_conv1d/two_conv1d.workload.yaml +19 -0
  234. tests/mapper/test_mapping_to_isl.py +90 -0
  235. tests/mapper/test_spatial_reuse_analysis.py +67 -0
  236. tests/mapper/test_temporal_reuse_analysis.py +56 -0
  237. tests/mapper/util.py +58 -0
  238. tests/matmul.mapping.yaml +29 -0
  239. tests/matmul.workload.yaml +12 -0
  240. tests/matmul_spatial.mapping.yaml +44 -0
  241. tests/mha.renames.yaml +65 -0
  242. tests/mha.workload.yaml +67 -0
  243. tests/mha.yaml +59 -0
  244. tests/mha_full.workload.yaml +67 -0
  245. tests/mobilenet.workload.yaml +35 -0
  246. tests/mobilenet_long.workload.yaml +64 -0
  247. tests/pmappingcache.py +24 -0
  248. tests/processing_stage.arch.yaml +40 -0
  249. tests/snowcat.arch.yaml +36 -0
  250. tests/test_ffm_join_pmappings.py +106 -0
  251. tests/test_ffm_make_pmappings.py +82 -0
  252. tests/test_ffm_make_tile_shapes.py +49 -0
  253. tests/test_mapper.py +100 -0
  254. tests/test_model.py +37 -0
  255. tests/test_plotting.py +72 -0
  256. tests/test_processing_stage.py +46 -0
  257. tests/test_symbolic_model.py +248 -0
  258. tests/test_workload.py +141 -0
docs/source/conf.py ADDED
@@ -0,0 +1,79 @@
1
+ import os
2
+ import sys
3
+ sys.path.insert(0, os.path.abspath('_ext'))
4
+ sys.path.insert(0, os.path.abspath('../..')) # Make your repo importable
5
+
6
+ import locale
7
+ locale.setlocale(locale.LC_ALL, 'C.UTF-8')
8
+
9
+ # -- Project information -----------------------------------------------------
10
+ project = 'accelforge'
11
+ author = 'Tanner Andrulis, Michael Gilbert'
12
+ release = '0.1.0'
13
+
14
+ # -- HTML output -------------------------------------------------------------
15
+ html_theme = 'sphinx_rtd_theme'
16
+ # html_theme = 'furo'
17
+ # html_theme = 'pydata_sphinx_theme'
18
+ # pip3 install sphinx-furo-theme
19
+
20
+ extensions = [
21
+ 'sphinx.ext.autodoc', # Pull docstrings
22
+ 'sphinx.ext.napoleon', # NumPy / Google style docstrings
23
+ 'sphinx.ext.autosummary', # Generate autodoc summaries
24
+ 'sphinx.ext.viewcode', # Add links to source code
25
+ 'sphinx_autodoc_typehints', # Include type hints
26
+ 'sphinx.ext.intersphinx', # Link to other projects' documentation
27
+ 'include_docstring', # Include docstrings
28
+ 'include_notebook', # Include notebooks
29
+ 'include_attrs', # Include attributes & their docstrings
30
+ 'include_functions', # Include functions & their docstrings
31
+ 'inherited_attributes', # Inherit docstrings from parent classes
32
+ 'include_yaml', # Include subsets of YAML files
33
+ 'sphinx_copybutton', # Add copy button to code blocks
34
+ ]
35
+
36
+ autodoc_default_options = {
37
+ 'members': True,
38
+ 'undoc-members': False,
39
+ 'exclude-members': 'model_config,model_fields,__pydantic_fields__,model_post_init',
40
+ }
41
+
42
+ # ---------- Autodoc settings ----------
43
+ # Show type hints inline in signatures
44
+ autodoc_typehints = "signature"
45
+ autodoc_typehints_format = "short"
46
+
47
+ # Preserve default values
48
+ autodoc_preserve_defaults = True
49
+
50
+ # Force multi-line for long constructor signatures (Sphinx 7+)
51
+ autodoc_class_signature = "separated"
52
+
53
+ # ---------- HTML CSS to wrap signatures ----------
54
+ # Create docs/source/_static/custom.css with:
55
+ # .signature {
56
+ # white-space: pre-wrap !important;
57
+ # word-break: break-word;
58
+ # }
59
+ # html_static_path = ["_static"]
60
+ # html_css_files = ["custom.css"]
61
+ # html_js_files = ["custom.js"]
62
+
63
+ # ---------- Optional: Napoleon settings ----------
64
+ # If using Google/NumPy style docstrings
65
+ napoleon_use_param = True
66
+ napoleon_use_rtype = True
67
+ napoleon_use_ivar = True
68
+
69
+ nitpicky = True
70
+
71
+ intersphinx_mapping = {
72
+ 'python': ('https://docs.python.org/3', None),
73
+ 'numpy': ('https://numpy.org/doc/stable/', None),
74
+ 'pandas': ('https://pandas.pydata.org/docs/', None),
75
+ # 'matplotlib': ('https://matplotlib.org/stable/contents.html', None),
76
+ 'scipy': ('https://docs.scipy.org/doc/scipy/reference/', None),
77
+ 'scikit-learn': ('https://scikit-learn.org/stable/documentation.html', None),
78
+ 'hwcomponents': ('https://accelergy-project.github.io/hwcomponents/', None),
79
+ }
@@ -0,0 +1,74 @@
1
+ cim_component_attributes: &cim_component_attributes
2
+ # These are for NeuroSim
3
+ rows: array_wordlines
4
+ cols: array_bitlines
5
+ cols_active_at_once: array_parallel_outputs
6
+ cell_config: cell_config
7
+ average_input_value: average_input_value
8
+ average_cell_value: average_weight_value
9
+ voltage: voltage
10
+ temporal_dac_bits: temporal_dac_resolution
11
+ read_pulse_width: read_pulse_width
12
+
13
+ # These are for the ADC plug-in. Set defaults for them
14
+ # so that if the user does not specify, we won't get
15
+ # an error.
16
+ resolution: adc_resolution
17
+ n_adcs: n_adc_per_bank
18
+ width: encoded_output_bits
19
+
20
+ # These will be applied to the weight drivers
21
+ weight_drivers_attributes: &weight_drivers_attributes
22
+ <<: *cim_component_attributes
23
+ rows: array_wordlines
24
+ cols: array_bitlines
25
+ cols_active_at_once: array_parallel_outputs
26
+ cell_config: cell_config
27
+ average_input_value: average_input_value
28
+ average_cell_value: average_weight_value
29
+ cycle_seconds: cycle_period
30
+ voltage: voltage
31
+ temporal_dac_bits: temporal_dac_resolution
32
+ sequential: True
33
+ read_pulse_width: 0 # Irrelevant for weight programming
34
+
35
+ variables_global: &variables_global
36
+ weight_bits: weight.bits_per_value
37
+ input_bits: input.bits_per_value
38
+ output_bits: output.bits_per_value
39
+ array_parallel_inputs: get_array_fanout_reuse_output(spec)
40
+ array_parallel_outputs: get_array_fanout_reuse_input(spec)
41
+ array_parallel_weights: get_array_fanout_total(spec)
42
+ array_wordlines: array_parallel_inputs * cim_unit_width_cells
43
+ array_bitlines: array_parallel_outputs * cim_unit_depth_cells
44
+ dac_resolution: max(voltage_dac_resolution, temporal_dac_resolution)
45
+ cols_active_at_once: array_parallel_outputs
46
+
47
+ # Calculate the number of slices needed to store the input and weight bits and
48
+ # the number of bits in each slice
49
+ in_b: encoded_input_bits # Shorthands so the following lines aren't super long
50
+ w_b: encoded_weight_bits
51
+ max_input_bits_per_slice: min(dac_resolution, in_b)
52
+ max_weight_bits_per_slice: min(cim_unit_width_cells * bits_per_cell, w_b)
53
+
54
+ average_input_bits_per_slice: encoded_input_bits / n_input_slices
55
+ average_weight_bits_per_slice: encoded_weight_bits / n_weight_slices
56
+
57
+ # This is for the bitwise-multiplication of the input and weight slices
58
+ n_virtual_macs: max_input_bits_per_slice * max_weight_bits_per_slice * encoded_output_bits
59
+
60
+ # Calculate statistics for input and weight values and bits after encoding
61
+ ehtas: encoded_hist_to_avg_slice # Shorthands so the following lines aren't super long
62
+ in_enc_fn: input_encoding_func
63
+ w_enc_fn: weight_encoding_func
64
+ average_input_value: ehtas(in_enc_fn(inputs_hist), in_b, max_input_bits_per_slice)
65
+ average_weight_value: ehtas(w_enc_fn(weights_hist), w_b, max_weight_bits_per_slice)
66
+ input_bit_distribution: ehtas(in_enc_fn(inputs_hist), in_b, 1, return_per_slice=True)
67
+ weight_bit_distribution: ehtas(w_enc_fn(weights_hist), w_b, 1, return_per_slice=True)
68
+
69
+ min_weight_slices: ceil(min_supported_weight_bits / bits_per_cell / cim_unit_width_cells)
70
+ min_input_slices: ceil(min_supported_input_bits / max_input_bits_per_slice)
71
+
72
+ n_input_slices: max(ceil(in_b / max_input_bits_per_slice), min_input_slices)
73
+ n_weight_slices: max(ceil(w_b / max_weight_bits_per_slice), min_weight_slices)
74
+ n_sliced_psums: n_input_slices * n_weight_slices
@@ -0,0 +1,229 @@
1
+ import accelforge as af
2
+
3
+
4
+ def get_array_fanout_reuse_input(spec: af.Spec) -> int:
5
+ n_rows = 1
6
+ for leaf in spec.arch.get_nodes_of_type(af.arch.Leaf):
7
+ if "array_reuse_input" in leaf.spatial:
8
+ fanout = leaf.spatial["array_reuse_input"]["fanout"]
9
+ assert isinstance(fanout, (int, float)), f"fanout {leaf.name}.spatial.array_reuse_input.fanout is not a number"
10
+ n_rows *= fanout
11
+ return n_rows
12
+
13
+
14
+ def get_array_fanout_reuse_output(spec: af.Spec) -> int:
15
+ n_cols = 1
16
+ for leaf in spec.arch.get_nodes_of_type(af.arch.Leaf):
17
+ if "array_reuse_output" in leaf.spatial:
18
+ fanout = leaf.spatial["array_reuse_output"]["fanout"]
19
+ assert isinstance(fanout, (int, float)), f"fanout {leaf.name}.spatial.array_reuse_output.fanout is not a number"
20
+ n_cols *= fanout
21
+ return n_cols
22
+
23
+
24
+ def get_array_fanout_total(spec: af.Spec) -> int:
25
+ return get_array_fanout_reuse_input(spec) * get_array_fanout_reuse_output(spec)
26
+
27
+
28
+ # Sign magnitude
29
+ # 1. Scale X to [-1, 1]: x = NORM_1_TO_NEG1(x, INPUTS_VALUE_DISTRIBUTION)
30
+ # 2. Convert to signed: x = abs(x) * (2 ** (INPUT_BITS - 1) - 1)
31
+ # 2. x = round(x * (2 ** INPUT_BITS - 1))
32
+
33
+ from math import log2
34
+ from typing import List, NamedTuple, Union
35
+
36
+ class ProbableBits(NamedTuple):
37
+ bits: list
38
+ probability: float
39
+
40
+ # ==============================================================================
41
+ # Encoding functions
42
+ # ==============================================================================
43
+
44
+
45
+ def magnitude_encode_hist(weights) -> List[ProbableBits]:
46
+ """
47
+ A signed value is encoded as a positive or negative magnitude of that value.
48
+ Signed hardware is requireed.
49
+ """
50
+ nbits = get_num_bits(weights)
51
+ encoded = []
52
+ halfwidth = len(weights) / 2
53
+ for i, w in enumerate(weights):
54
+ normed = norm(i, len(weights), -halfwidth + 0.5, halfwidth + 0.5)
55
+ encoded.append(ProbableBits(to_bits_unsigned(abs(normed), nbits)[1:], w))
56
+ return norm_encoded_hist(encoded)
57
+
58
+ def two_part_magnitude_encode_hist(weights):
59
+ """
60
+ Two (devices, timesteps, components, etc.) encode each signed value. If the
61
+ value is positive, the first device encodes the magnitude of the value. If the
62
+ value is negative, the second device encodes the magnitude of the value. The
63
+ other device encodes 0.
64
+ """
65
+ m = magnitude_encode_hist(weights)
66
+ m2 = []
67
+ for e in m:
68
+ m2.append(ProbableBits(e.bits, e.probability / 2))
69
+ m2.append(ProbableBits([0] * len(e.bits), e.probability / 2))
70
+ return m2
71
+
72
+ def offset_encode_hist(weights):
73
+ """
74
+ A signed value is encoded as the the value minus the negative minimum value.
75
+ This maps a range of [-min, max] to [0, max - min]. The bias must be added
76
+ back after computation.
77
+ """
78
+ nbits = get_num_bits(weights)
79
+ encoded = []
80
+ for i, w in enumerate(weights):
81
+ normed = norm(i, len(weights), 0, len(weights))
82
+ encoded.append(ProbableBits(to_bits_unsigned(normed, nbits), w))
83
+ return norm_encoded_hist(encoded)
84
+
85
+
86
+ def offset_encode_if_signed_hist(weights):
87
+ """
88
+ Offset encode a value only if it is signed. Otherwise, don't apply any bias and just
89
+ use the positive values.
90
+ """
91
+ if is_hist_signed(weights):
92
+ return offset_encode_hist(weights)
93
+ return magnitude_encode_hist(weights)
94
+
95
+
96
+ def two_part_magnitude_encode_if_signed_hist(weights):
97
+ """
98
+ Two part magnitude encode a value only if it is signed. Otherwise, use only posiive
99
+ values.
100
+ """
101
+ if is_hist_signed(weights):
102
+ return two_part_magnitude_encode_hist(weights)
103
+ return magnitude_encode_hist(weights)
104
+
105
+
106
+ def xnor_encode_hist(weights):
107
+ """
108
+ XNOR encoding based on Jia JSSCC 2020.
109
+ """
110
+ nbits = get_num_bits(weights)
111
+ encoded = []
112
+ halfwidth = len(weights) / 2
113
+ for i, w in enumerate(weights):
114
+ normed = norm(i, len(weights), -halfwidth + 0.5, halfwidth + 0.5)
115
+ bits = []
116
+ for j in list(range(nbits - 1, -1, -1)) + [-1, -1]:
117
+ bits.append(int(normed > 0))
118
+ normed -= 2**j * (2 * bits[-1] - 1)
119
+ assert normed == 0, f"normed={normed} is not 0"
120
+ encoded.append(ProbableBits(bits, w))
121
+ return norm_encoded_hist(encoded)
122
+
123
+
124
+ def zero_gated_xnor_encode_hist(weights):
125
+ """
126
+ XNOR encoding with zero gating based on Jia JSSCC 2020.
127
+ """
128
+ encoded = xnor_encode_hist(weights)
129
+ zero_idx = len(encoded) // 2
130
+ encoded[zero_idx] = ProbableBits(
131
+ [0] * len(encoded[zero_idx].bits), encoded[zero_idx].probability
132
+ )
133
+ return encoded
134
+
135
+ # ==============================================================================
136
+ # Helper functions
137
+ # ==============================================================================
138
+
139
+ def assert_hist_pow2_minus1(hist):
140
+ x = 1
141
+ while x <= len(hist):
142
+ x *= 2
143
+ assert x - 1 == len(
144
+ hist
145
+ ), f"Histogram length {len(hist)} is not a power of 2 minus 1."
146
+
147
+
148
+ def norm_encoded_hist(encoded_hist: List[ProbableBits]):
149
+ sum_probs = sum([e.probability for e in encoded_hist])
150
+ return [ProbableBits(e.bits, e.probability / sum_probs) for e in encoded_hist]
151
+
152
+
153
+ def get_num_bits(hist):
154
+ n_bits = 0
155
+ while 2**n_bits < len(hist) + 1:
156
+ n_bits += 1
157
+ assert (
158
+ 2**n_bits == len(hist) + 1
159
+ ), f"Number of histogram bins + 1 must be a power of 2, got {len(hist)}."
160
+ return n_bits
161
+
162
+
163
+ def is_hist_signed(hist):
164
+ return sum(hist[: len(hist) // 2]) != 0
165
+
166
+
167
+ def hist_to_magnitude(hist):
168
+ assert_hist_pow2_minus1(hist)
169
+ new_hist = [0] * (len(hist) // 2)
170
+ hist_center = len(hist) // 2
171
+ for i in range(len(new_hist)):
172
+ new_hist[i] = hist[hist_center + i] + hist[hist_center - i]
173
+ assert_hist_pow2_minus1(new_hist)
174
+ return new_hist
175
+
176
+
177
+ def to_bits_unsigned(x, nbits):
178
+ x = round(x)
179
+ assert 0 <= x < 2**nbits, f"x={x} is not in range [0, 2^{nbits})"
180
+ return [int(i) for i in bin(x)[2 : nbits + 2].zfill(nbits)]
181
+
182
+
183
+ def norm(x, nbins, rmin, rmax):
184
+ return x / nbins * (rmax - rmin) + rmin
185
+
186
+
187
+ def encoded_hist_to_avg_slice(
188
+ encoded_hist: List[ProbableBits],
189
+ total_bits: int,
190
+ bits_per_slice: Union[list, int],
191
+ partial_slices_use_full_range: bool = False,
192
+ return_per_slice: bool = False,
193
+ ):
194
+ if isinstance(bits_per_slice, int):
195
+ bits_per_slice = [bits_per_slice] * (total_bits // bits_per_slice)
196
+ if sum(bits_per_slice) != total_bits:
197
+ bits_per_slice.append(total_bits - sum(bits_per_slice))
198
+
199
+ assert total_bits == sum(bits_per_slice), (
200
+ f"Sum of bits per slice {sum(bits_per_slice)} != total_bits " f"{total_bits}"
201
+ )
202
+
203
+ bit2slice = []
204
+ max_val = max(2 ** max(bits_per_slice) - 1, 1)
205
+ for i, b in enumerate(bits_per_slice):
206
+ m = max(2**b - 1, 1) if partial_slices_use_full_range else max_val
207
+ bit2slice += [(i, max((2 ** (b - j - 1)), 1) / m) for j in range(b)]
208
+
209
+ avg_slice_values = [0] * len(bits_per_slice)
210
+ for e in encoded_hist:
211
+ for i in range(total_bits):
212
+ slice_idx, scale = bit2slice[i]
213
+ if i >= len(e.bits):
214
+ bit_value = sum(e.bits) / len(e.bits)
215
+ else:
216
+ bit_value = e.bits[i]
217
+ avg_slice_values[slice_idx] += bit_value * e.probability * scale
218
+
219
+ if return_per_slice:
220
+ return avg_slice_values
221
+
222
+ return sum(avg_slice_values) / len(avg_slice_values)
223
+
224
+
225
+ if __name__ == "__main__":
226
+ input_dist = [16 - abs(16 - i) for i in range(31)]
227
+ print(f"input_dist: {input_dist}")
228
+ for e in xnor_encode_hist(input_dist):
229
+ print(e)
@@ -0,0 +1,57 @@
1
+ import accelforge as af
2
+ import os
3
+
4
+ THIS_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
5
+ VARIABLES_GLOBAL_PATH = os.path.join(THIS_SCRIPT_DIR, "_include.yaml")
6
+
7
+
8
+ def get_spec(
9
+ arch_name: str,
10
+ compare_with_arch_name: str | None = None,
11
+ add_dummy_main_memory: bool = False,
12
+ ) -> af.Spec:
13
+ """
14
+ Gets the spec for the given architecture. If `compare_with_arch_name` is given, the
15
+ variables_iso will be grabbed from `compare_with_arch_name` in order to match
16
+ attributes for fair comparison.
17
+
18
+ Parameters
19
+ ----------
20
+ arch_name: str
21
+ The name of the architecture to get the spec for.
22
+ compare_with_arch_name: str | None
23
+ The name of the architecture to compare with. If not given, variables will be
24
+ taken from the given `arch_name`.
25
+
26
+ Returns
27
+ -------
28
+ spec: af.Spec
29
+ The spec for the given architecture.
30
+ """
31
+ if compare_with_arch_name is None:
32
+ compare_with_name = arch_name
33
+ else:
34
+ compare_with_name = compare_with_arch_name
35
+
36
+ arch_name = os.path.join(THIS_SCRIPT_DIR, f"{arch_name}.yaml")
37
+ compare_with_name = os.path.join(THIS_SCRIPT_DIR, f"{compare_with_name}.yaml")
38
+ variables = af.Variables.from_yaml(arch_name, top_key="variables")
39
+ arch = af.Arch.from_yaml(arch_name, top_key="arch")
40
+ workload = af.Workload.from_yaml(arch_name, top_key="workload")
41
+ spec = af.Spec(arch=arch, variables=variables, workload=workload)
42
+
43
+ spec.config.expression_custom_functions.append(
44
+ os.path.join(THIS_SCRIPT_DIR, "_include_functions.py")
45
+ )
46
+ spec.config.component_models.append(
47
+ os.path.join(THIS_SCRIPT_DIR, "components/*.py")
48
+ )
49
+ if add_dummy_main_memory:
50
+ main_memory = af.arch.Memory(
51
+ name="MainMemory",
52
+ component_class="Dummy",
53
+ size=float("inf"),
54
+ tensors={"keep": "~weight"}
55
+ )
56
+ spec.arch.nodes.insert(0, main_memory)
57
+ return spec
@@ -0,0 +1,181 @@
1
+ from hwcomponents.scaling import linear
2
+ from hwcomponents_neurosim import NOTGate
3
+ from hwcomponents import ComponentModel, action
4
+ from misc import Capacitor
5
+
6
+
7
+ class C2CMultiplier(ComponentModel):
8
+ """
9
+ The C2C multiplier looks like the following:
10
+
11
+ - For operand A as an analog voltage
12
+ - Operand B is a binary digital value with bits B0, B1, B2... from least to most
13
+ significant
14
+
15
+ The circuit looks like:
16
+
17
+ 2C 2C 2C 2C 2C 2C
18
+ G──||───┰──||──────┰──||──────┰──||──────┰──||──────┰──||──── -> OUT
19
+ = C = C = C = C = C
20
+ │ │ │ │ │
21
+ ╲─── B0 ╲─── B1 ╲─── B2 ╲─── B3 ╲─── B4
22
+ │ G │ G │ G │ G │ G
23
+ A──────┴──────────┴──────────┴──────────┴──────────┴─────────
24
+
25
+ Energy is consumed when: 1. A increases, and all the B capacitors are charged 2. Any
26
+ B bit goes 0->1, and the corresponding capacitor is charged
27
+
28
+ USAGE: In your architecture, initialize a both a C2CMultiplier and a
29
+ C2CMultiplierPortB. Have the "a" port process the analog operand and have the "b"
30
+ port process the digital operand.
31
+
32
+ The C2CMultiplier component has area accounted for. The C2CMultiplierPortB component
33
+ does not have any area!
34
+
35
+ Parameters
36
+ ----------
37
+ resolution: int
38
+ The resolution of the multiplier.
39
+ voltage: float
40
+ The voltage of the multiplier in volts.
41
+ unit_capacitance: float
42
+ The unit capacitance of the multiplier in Farads.
43
+ a_hist: list[float]
44
+ The histogram of the analog operand's values. This is a histogram of the values,
45
+ assumed to be spaced between 0 and voltage, inclusive.
46
+ b_bit_distribution: list[float]
47
+ The distribution of the binary operand's bits. Each is a probability of a given
48
+ bit being 1.
49
+ tech_node: str
50
+ The tech node of the multiplier in meters.
51
+ """
52
+
53
+ priority = 0.5
54
+
55
+ def __init__(
56
+ self,
57
+ resolution: int,
58
+ voltage: float,
59
+ unit_capacitance: float,
60
+ a_hist: list[float],
61
+ b_bit_distribution: list[float],
62
+ tech_node: str,
63
+ ):
64
+ self.voltage = voltage
65
+ self.unit_capacitance = unit_capacitance
66
+ self.a_hist = a_hist
67
+ self.b_bit_distribution = b_bit_distribution
68
+ self.tech_node = tech_node
69
+
70
+ self.unit_cap = Capacitor(
71
+ capacitance=unit_capacitance,
72
+ voltage=voltage,
73
+ tech_node=tech_node,
74
+ )
75
+ self.unit2_cap = Capacitor(
76
+ capacitance=unit_capacitance * 2,
77
+ voltage=voltage,
78
+ tech_node=tech_node,
79
+ )
80
+ self.inverter = NOTGate(tech_node=self.tech_node, cycle_period=1e-9)
81
+
82
+ a_rms = (sum(i**2 * p for i, p in enumerate(a_hist)) / sum(a_hist)) ** 0.5
83
+ self.a_rms = a_rms * voltage / (len(a_hist) - 1)
84
+
85
+ if not all(0 <= p <= 1 for p in b_bit_distribution):
86
+ raise ValueError("Bit probabilities must be between 0 and 1")
87
+ self.b_lo2hi_probability = sum(p * (1 - p) for p in b_bit_distribution) / len(
88
+ b_bit_distribution
89
+ )
90
+
91
+ # Pass gates are 2 transistors, 100F^2 each
92
+ control_pass_gate_area = 2 * self.tech_node**2 * 100
93
+ cap_area = self.unit_cap.area + self.unit2_cap.area
94
+ inverter_area = self.inverter.area
95
+
96
+ # Assume pass gates don't leak
97
+ inverter_leak = self.inverter.leak_power
98
+ cap_leak = self.unit_cap.leak_power + self.unit2_cap.leak_power
99
+
100
+ super().__init__(
101
+ area=cap_area + inverter_area + control_pass_gate_area,
102
+ leak_power=cap_leak + inverter_leak,
103
+ )
104
+
105
+ self.resolution: float = self.scale(
106
+ "resolution",
107
+ resolution,
108
+ 1,
109
+ area_scale_function=linear,
110
+ energy_scale_function=linear,
111
+ latency_scale_function=None,
112
+ leak_power_scale_function=linear,
113
+ )
114
+
115
+ @action
116
+ def switch_a(self):
117
+ """
118
+ Charge all capacitors to the values in a_hist.
119
+ """
120
+ # Count energy by just charging one of the capacitors and multiplying by the
121
+ # number of bits.
122
+ energy_latency = self.unit_cap.switch(self.a_hist)
123
+ energy_latency += self.unit2_cap.switch(self.a_hist)
124
+
125
+ # The reference node sees a cap of unit_capacitance * 1.67 / resolution per bit
126
+ # on average assuming a uniform-ish distribution of bits
127
+ energy, latency = energy_latency
128
+ energy *= 1.67 / self.resolution
129
+
130
+ return energy, latency
131
+
132
+ @action
133
+ def switch_b(self):
134
+ """
135
+ Connect capacitors to A with probability b_lo2hi_probability.
136
+ """
137
+ energy_latency = self.unit_cap.raise_voltage_to(self.a_rms)
138
+ energy_latency += self.unit2_cap.raise_voltage_to(self.a_rms)
139
+ energy, latency = energy_latency
140
+ energy *= self.b_lo2hi_probability
141
+ return energy, latency
142
+
143
+ @action
144
+ def read(self):
145
+ """
146
+ Returns the energy and latency to send a value through the multiplier's analog
147
+ port. If you are only using the read() action, then also initialize a
148
+ C2CMultiplierPortB to have it process the digital operand with the read()
149
+ action.
150
+ """
151
+ return self.switch_a()
152
+
153
+
154
+ class C2CMultiplierPortB(C2CMultiplier):
155
+ def __init__(
156
+ self,
157
+ resolution: int,
158
+ voltage: float,
159
+ unit_capacitance: float,
160
+ a_hist: list[float],
161
+ b_bit_distribution: list[float],
162
+ tech_node: str,
163
+ ):
164
+ super().__init__(
165
+ resolution=resolution,
166
+ voltage=voltage,
167
+ unit_capacitance=unit_capacitance,
168
+ a_hist=a_hist,
169
+ b_bit_distribution=b_bit_distribution,
170
+ tech_node=tech_node,
171
+ )
172
+ self.area_scale = 0
173
+
174
+ @action
175
+ def read(self):
176
+ """
177
+ Returns the energy and latency to send a value through the multiplier's digital
178
+ port. If you are only using the read() action, then also initialize a
179
+ C2CMultiplier to have it process the analog operand with the read() action.
180
+ """
181
+ return self.switch_b()