accelforge 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (258) hide show
  1. accelforge/__init__.py +21 -0
  2. accelforge/_accelerated_imports.py +16 -0
  3. accelforge/_deprecate/_simanneal/evalmapping.py +271 -0
  4. accelforge/_deprecate/_simanneal/mapspaceglobals.py +298 -0
  5. accelforge/_deprecate/_simanneal/simanneal.py +666 -0
  6. accelforge/_deprecate/_simanneal/tracking.py +105 -0
  7. accelforge/_deprecate/_simanneal/wrappers.py +218 -0
  8. accelforge/_deprecate/_simanneal2/__init__.py +7 -0
  9. accelforge/_deprecate/_simanneal2/simanneal.py +493 -0
  10. accelforge/_deprecate/_simanneal2/tracking.py +116 -0
  11. accelforge/_deprecate/compatibility_util.py +181 -0
  12. accelforge/_deprecate/layerdeduplication/__init__.py +2 -0
  13. accelforge/_deprecate/layerdeduplication/group_similar_einsums.py +160 -0
  14. accelforge/_deprecate/layerdeduplication/grouped_einsums.py +84 -0
  15. accelforge/_deprecate/mapping_filter_tags/__init__.py +2 -0
  16. accelforge/_deprecate/mapping_filter_tags/ffmt.py +212 -0
  17. accelforge/_deprecate/mapping_filter_tags/onesplit.py +24 -0
  18. accelforge/_deprecate/mapping_filter_tags/util.py +24 -0
  19. accelforge/_deprecate/tags.py +69 -0
  20. accelforge/_deprecate/viz/__init__.py +0 -0
  21. accelforge/_deprecate/viz/interactive.py +159 -0
  22. accelforge/_deprecate/viz/reservationtree.py +307 -0
  23. accelforge/_deprecate/viz/ski_slope.py +88 -0
  24. accelforge/_version.py +15 -0
  25. accelforge/examples.py +39 -0
  26. accelforge/frontend/__init__.py +10 -0
  27. accelforge/frontend/_binding.py +129 -0
  28. accelforge/frontend/_workload_isl/__init__.py +2 -0
  29. accelforge/frontend/_workload_isl/_isl.py +149 -0
  30. accelforge/frontend/_workload_isl/_symbolic.py +141 -0
  31. accelforge/frontend/arch copy.py +1544 -0
  32. accelforge/frontend/arch.py +1642 -0
  33. accelforge/frontend/config.py +63 -0
  34. accelforge/frontend/mapper/__init__.py +5 -0
  35. accelforge/frontend/mapper/ffm.py +126 -0
  36. accelforge/frontend/mapper/mapper.py +7 -0
  37. accelforge/frontend/mapper/metrics.py +30 -0
  38. accelforge/frontend/mapping/__init__.py +1 -0
  39. accelforge/frontend/mapping/mapping.py +1736 -0
  40. accelforge/frontend/model.py +14 -0
  41. accelforge/frontend/renames.py +150 -0
  42. accelforge/frontend/spec copy.py +230 -0
  43. accelforge/frontend/spec.py +301 -0
  44. accelforge/frontend/variables.py +12 -0
  45. accelforge/frontend/workload.py +952 -0
  46. accelforge/mapper/FFM/__init__.py +9 -0
  47. accelforge/mapper/FFM/_join_pmappings/__init__.py +0 -0
  48. accelforge/mapper/FFM/_join_pmappings/compatibility.py +653 -0
  49. accelforge/mapper/FFM/_join_pmappings/compress_pmappings.py +140 -0
  50. accelforge/mapper/FFM/_join_pmappings/join_pmappings.py +703 -0
  51. accelforge/mapper/FFM/_join_pmappings/pmapping_dataframe.py +901 -0
  52. accelforge/mapper/FFM/_join_pmappings/pmapping_group.py +337 -0
  53. accelforge/mapper/FFM/_make_pmappings/contraints/__init__.py +0 -0
  54. accelforge/mapper/FFM/_make_pmappings/contraints/constraints.py +360 -0
  55. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/__init__.py +1 -0
  56. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_loops.py +373 -0
  57. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_pmapping_templates.py +463 -0
  58. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_reservations.py +95 -0
  59. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storage_order.py +382 -0
  60. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storages.py +155 -0
  61. accelforge/mapper/FFM/_make_pmappings/make_pmappings.py +411 -0
  62. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/__init__.py +1 -0
  63. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_pmappings_from_templates.py +407 -0
  64. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_tile_shapes.py +1681 -0
  65. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/run_model.py +170 -0
  66. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/symbol_relations.py +174 -0
  67. accelforge/mapper/FFM/_make_pmappings/pmapper_job.py +282 -0
  68. accelforge/mapper/FFM/_pareto_df/df_convention.py +273 -0
  69. accelforge/mapper/FFM/_pareto_df/pareto copy.py +836 -0
  70. accelforge/mapper/FFM/_pareto_df/pareto.py +508 -0
  71. accelforge/mapper/FFM/data.py +61 -0
  72. accelforge/mapper/FFM/main copy.py +236 -0
  73. accelforge/mapper/FFM/main.py +208 -0
  74. accelforge/mapper/FFM/mappings.py +510 -0
  75. accelforge/mapper/FFM/pmappings.py +310 -0
  76. accelforge/mapper/__init__.py +4 -0
  77. accelforge/mapper.py +0 -0
  78. accelforge/model/__init__.py +1 -0
  79. accelforge/model/_looptree/__init__.py +0 -0
  80. accelforge/model/_looptree/accesses.py +335 -0
  81. accelforge/model/_looptree/capacity/__init__.py +1 -0
  82. accelforge/model/_looptree/capacity/aggregators.py +36 -0
  83. accelforge/model/_looptree/capacity/capacity.py +47 -0
  84. accelforge/model/_looptree/energy.py +150 -0
  85. accelforge/model/_looptree/equivalent_ranks.py +29 -0
  86. accelforge/model/_looptree/latency/__init__.py +1 -0
  87. accelforge/model/_looptree/latency/latency.py +98 -0
  88. accelforge/model/_looptree/latency/memory.py +120 -0
  89. accelforge/model/_looptree/latency/processors.py +92 -0
  90. accelforge/model/_looptree/mapping_utilities.py +71 -0
  91. accelforge/model/_looptree/reuse/__init__.py +4 -0
  92. accelforge/model/_looptree/reuse/isl/__init__.py +1 -0
  93. accelforge/model/_looptree/reuse/isl/des.py +59 -0
  94. accelforge/model/_looptree/reuse/isl/isl_functions.py +374 -0
  95. accelforge/model/_looptree/reuse/isl/mapping_to_isl/__init__.py +4 -0
  96. accelforge/model/_looptree/reuse/isl/mapping_to_isl/analyze_mapping.py +297 -0
  97. accelforge/model/_looptree/reuse/isl/mapping_to_isl/skews_from_mapping.py +236 -0
  98. accelforge/model/_looptree/reuse/isl/mapping_to_isl/tiling.py +685 -0
  99. accelforge/model/_looptree/reuse/isl/mapping_to_isl/types.py +188 -0
  100. accelforge/model/_looptree/reuse/isl/spatial.py +260 -0
  101. accelforge/model/_looptree/reuse/isl/temporal.py +182 -0
  102. accelforge/model/_looptree/reuse/symbolic/__init__.py +1 -0
  103. accelforge/model/_looptree/reuse/symbolic/symbolic copy 2.py +1346 -0
  104. accelforge/model/_looptree/reuse/symbolic/symbolic copy.py +1408 -0
  105. accelforge/model/_looptree/reuse/symbolic/symbolic.py +1396 -0
  106. accelforge/model/_looptree/run.py +122 -0
  107. accelforge/model/_looptree/types.py +26 -0
  108. accelforge/model/_looptree/visualization/__init__.py +0 -0
  109. accelforge/model/_looptree/visualization/occupancy.py +11 -0
  110. accelforge/model/main.py +222 -0
  111. accelforge/plotting/__init__.py +2 -0
  112. accelforge/plotting/mappings.py +219 -0
  113. accelforge/plotting/specs.py +57 -0
  114. accelforge/util/__init__.py +4 -0
  115. accelforge/util/_base_analysis_types.py +24 -0
  116. accelforge/util/_basetypes.py +1089 -0
  117. accelforge/util/_frozenset.py +36 -0
  118. accelforge/util/_isl.py +29 -0
  119. accelforge/util/_itertools.py +14 -0
  120. accelforge/util/_mathfuncs.py +57 -0
  121. accelforge/util/_parse_expressions.py +339 -0
  122. accelforge/util/_picklecache.py +32 -0
  123. accelforge/util/_setexpressions.py +268 -0
  124. accelforge/util/_sympy/__init__.py +0 -0
  125. accelforge/util/_sympy/broadcast_max.py +18 -0
  126. accelforge/util/_visualization.py +112 -0
  127. accelforge/util/_yaml.py +579 -0
  128. accelforge/util/parallel.py +193 -0
  129. accelforge-0.0.1.dist-info/METADATA +64 -0
  130. accelforge-0.0.1.dist-info/RECORD +258 -0
  131. accelforge-0.0.1.dist-info/WHEEL +5 -0
  132. accelforge-0.0.1.dist-info/licenses/LICENSE +19 -0
  133. accelforge-0.0.1.dist-info/top_level.txt +5 -0
  134. docs/_build/html/_sources/fastfusion.frontend.mapper.rst.txt +37 -0
  135. docs/_build/html/_sources/fastfusion.frontend.rst.txt +70 -0
  136. docs/_build/html/_sources/fastfusion.frontend.workload.rst.txt +21 -0
  137. docs/_build/html/_sources/fastfusion.mapper.FFM.rst.txt +37 -0
  138. docs/_build/html/_sources/fastfusion.mapper.rst.txt +18 -0
  139. docs/_build/html/_sources/fastfusion.rst.txt +20 -0
  140. docs/_build/html/_sources/fastfusion.util.rst.txt +21 -0
  141. docs/_build/html/_sources/index.rst.txt +87 -0
  142. docs/_build/html/_sources/modules.rst.txt +7 -0
  143. docs/_build/html/_sources/notes/citation.rst.txt +45 -0
  144. docs/_build/html/_sources/notes/definitions.rst.txt +43 -0
  145. docs/_build/html/_sources/notes/faqs.rst.txt +39 -0
  146. docs/_build/html/_sources/notes/modeling/accelerator_energy_latency.rst.txt +72 -0
  147. docs/_build/html/_sources/notes/modeling/component_energy_area.rst.txt +96 -0
  148. docs/_build/html/_sources/notes/modeling/mapping.rst.txt +100 -0
  149. docs/_build/html/_sources/notes/modeling.rst.txt +33 -0
  150. docs/_build/html/_sources/notes/parsing/arithmetic_parsing.rst.txt +136 -0
  151. docs/_build/html/_sources/notes/parsing/setexpressions.rst.txt +63 -0
  152. docs/_build/html/_sources/notes/parsing/yaml_parsing.rst.txt +176 -0
  153. docs/_build/html/_sources/notes/quickstart_and_installation.rst.txt +9 -0
  154. docs/_build/html/_sources/notes/spec/architecture.rst.txt +133 -0
  155. docs/_build/html/_sources/notes/spec/mapping.rst.txt +12 -0
  156. docs/_build/html/_sources/notes/spec/workload.rst.txt +83 -0
  157. docs/_build/html/_sources/notes/spec.rst.txt +36 -0
  158. docs/source/_ext/include_attrs.py +213 -0
  159. docs/source/_ext/include_docstring.py +364 -0
  160. docs/source/_ext/include_functions.py +154 -0
  161. docs/source/_ext/include_notebook.py +131 -0
  162. docs/source/_ext/include_yaml.py +119 -0
  163. docs/source/_ext/inherited_attributes.py +222 -0
  164. docs/source/_ext/paths.py +4 -0
  165. docs/source/conf.py +79 -0
  166. examples/arches/compute_in_memory/_include.yaml +74 -0
  167. examples/arches/compute_in_memory/_include_functions.py +229 -0
  168. examples/arches/compute_in_memory/_load_spec.py +57 -0
  169. examples/arches/compute_in_memory/components/c2c_multiplier.py +181 -0
  170. examples/arches/compute_in_memory/components/dac_c2c_r2r.py +605 -0
  171. examples/arches/compute_in_memory/components/misc.py +195 -0
  172. examples/arches/compute_in_memory/components/util/bit_functions.py +51 -0
  173. examples/arches/compute_in_memory/components/zero_comparator.py +92 -0
  174. examples/arches/compute_in_memory/isaac.yaml +233 -0
  175. examples/arches/compute_in_memory/memory_cells/ecram_demo.yaml +63 -0
  176. examples/arches/compute_in_memory/memory_cells/rram_example.yaml +63 -0
  177. examples/arches/compute_in_memory/memory_cells/rram_isaac_isca_2016.yaml +64 -0
  178. examples/arches/compute_in_memory/memory_cells/rram_neurosim_default.yaml +63 -0
  179. examples/arches/compute_in_memory/memory_cells/rram_raella_isca_2023.yaml +70 -0
  180. examples/arches/compute_in_memory/memory_cells/rram_wan_nature_2022.yaml +63 -0
  181. examples/arches/compute_in_memory/memory_cells/sram_colonnade_jssc_2021.yaml +63 -0
  182. examples/arches/compute_in_memory/memory_cells/sram_example.yaml +63 -0
  183. examples/arches/compute_in_memory/memory_cells/sram_jia_jssc_2020.yaml +63 -0
  184. examples/arches/compute_in_memory/memory_cells/sram_sinangil_jssc_2021.yaml +63 -0
  185. examples/arches/compute_in_memory/memory_cells/sram_wang_vlsi_2022.yaml +63 -0
  186. examples/arches/compute_in_memory/wang_vlsi_2022.yaml +289 -0
  187. examples/arches/eyeriss.yaml +68 -0
  188. examples/arches/fanout_variations/at_glb.yaml +31 -0
  189. examples/arches/fanout_variations/at_glb_with_fanout_node.yaml +34 -0
  190. examples/arches/fanout_variations/at_mac.yaml +31 -0
  191. examples/arches/fanout_variations/at_mac_with_constraints.yaml +38 -0
  192. examples/arches/fanout_variations/at_mac_with_fanout_node.yaml +34 -0
  193. examples/arches/nvdla.yaml +47 -0
  194. examples/arches/simple.yaml +28 -0
  195. examples/arches/tpu_v4i.yaml +67 -0
  196. examples/mappings/unfused_matmuls_to_simple.yaml +33 -0
  197. examples/misc/component_annotated.yaml +33 -0
  198. examples/workloads/gpt3_6.7B.yaml +124 -0
  199. examples/workloads/matmuls.yaml +20 -0
  200. examples/workloads/mobilenet_28.yaml +81 -0
  201. examples/workloads/mobilenet_various_separate.yaml +106 -0
  202. examples/workloads/three_matmuls_annotated.yaml +59 -0
  203. notebooks/.ipynb_checkpoints/fastfusion_arch_study_michael-checkpoint.ipynb +359 -0
  204. notebooks/compute_in_memory/_scripts.py +339 -0
  205. notebooks/compute_in_memory/isaac.guide.ipynb +270 -0
  206. notebooks/compute_in_memory/wang_vlsi_2022.ipynb +602 -0
  207. notebooks/paths.py +4 -0
  208. notebooks/tutorials/.ipynb_checkpoints/1_FFM-checkpoint.ipynb +3110 -0
  209. notebooks/tutorials/FFM.ipynb +3498 -0
  210. notebooks/tutorials/_include.py +48 -0
  211. notebooks/tutorials/component_energy_area.ipynb +363 -0
  212. tests/Q_mapping.yaml +38 -0
  213. tests/__init__.py +0 -0
  214. tests/conv.mapping.yaml +27 -0
  215. tests/conv.workload.yaml +13 -0
  216. tests/conv_sym.mapping.yaml +43 -0
  217. tests/copy.mapping.yaml +35 -0
  218. tests/copy.workload.yaml +15 -0
  219. tests/distribuffers/__init__.py +0 -0
  220. tests/distribuffers/multicast/test_cases.yaml +482 -0
  221. tests/distribuffers/spec/binding/valid_bindings.yaml +97 -0
  222. tests/distribuffers/spec/distributed.yaml +100 -0
  223. tests/distribuffers/spec/logical_arch.yaml +32 -0
  224. tests/distribuffers/spec/physical_arch.yaml +69 -0
  225. tests/distribuffers/test_binding.py +48 -0
  226. tests/frontend/__init__.py +0 -0
  227. tests/frontend/test_mapping_viz.py +52 -0
  228. tests/mapper/__init__.py +0 -0
  229. tests/mapper/configs/conv1d/conv1d.mapping.yaml +31 -0
  230. tests/mapper/configs/conv1d/conv1d.workload.yaml +11 -0
  231. tests/mapper/configs/two_conv1d/two_conv1d.expected.yaml +38 -0
  232. tests/mapper/configs/two_conv1d/two_conv1d.mapping.yaml +54 -0
  233. tests/mapper/configs/two_conv1d/two_conv1d.workload.yaml +19 -0
  234. tests/mapper/test_mapping_to_isl.py +90 -0
  235. tests/mapper/test_spatial_reuse_analysis.py +67 -0
  236. tests/mapper/test_temporal_reuse_analysis.py +56 -0
  237. tests/mapper/util.py +58 -0
  238. tests/matmul.mapping.yaml +29 -0
  239. tests/matmul.workload.yaml +12 -0
  240. tests/matmul_spatial.mapping.yaml +44 -0
  241. tests/mha.renames.yaml +65 -0
  242. tests/mha.workload.yaml +67 -0
  243. tests/mha.yaml +59 -0
  244. tests/mha_full.workload.yaml +67 -0
  245. tests/mobilenet.workload.yaml +35 -0
  246. tests/mobilenet_long.workload.yaml +64 -0
  247. tests/pmappingcache.py +24 -0
  248. tests/processing_stage.arch.yaml +40 -0
  249. tests/snowcat.arch.yaml +36 -0
  250. tests/test_ffm_join_pmappings.py +106 -0
  251. tests/test_ffm_make_pmappings.py +82 -0
  252. tests/test_ffm_make_tile_shapes.py +49 -0
  253. tests/test_mapper.py +100 -0
  254. tests/test_model.py +37 -0
  255. tests/test_plotting.py +72 -0
  256. tests/test_processing_stage.py +46 -0
  257. tests/test_symbolic_model.py +248 -0
  258. tests/test_workload.py +141 -0
@@ -0,0 +1,70 @@
1
+ fastfusion.frontend package
2
+ ===========================
3
+
4
+ Subpackages
5
+ -----------
6
+
7
+ .. toctree::
8
+ :maxdepth: 4
9
+
10
+ fastfusion.frontend.mapper
11
+ fastfusion.frontend.workload
12
+
13
+ Submodules
14
+ ----------
15
+
16
+ fastfusion.frontend.arch module
17
+ -------------------------------
18
+
19
+ .. automodule:: fastfusion.frontend.arch
20
+ :members:
21
+ :show-inheritance:
22
+ :undoc-members:
23
+
24
+ fastfusion.frontend.config module
25
+ ---------------------------------
26
+
27
+ .. automodule:: fastfusion.frontend.config
28
+ :members:
29
+ :show-inheritance:
30
+ :undoc-members:
31
+
32
+ fastfusion.frontend.mapping module
33
+ ----------------------------------
34
+
35
+ .. automodule:: fastfusion.frontend.mapping
36
+ :members:
37
+ :show-inheritance:
38
+ :undoc-members:
39
+
40
+ fastfusion.frontend.renames module
41
+ ----------------------------------
42
+
43
+ .. automodule:: fastfusion.frontend.renames
44
+ :members:
45
+ :show-inheritance:
46
+ :undoc-members:
47
+
48
+ fastfusion.frontend.spec module
49
+ -------------------------------
50
+
51
+ .. automodule:: fastfusion.frontend.spec
52
+ :members:
53
+ :show-inheritance:
54
+ :undoc-members:
55
+
56
+ fastfusion.frontend.variables module
57
+ ------------------------------------
58
+
59
+ .. automodule:: fastfusion.frontend.variables
60
+ :members:
61
+ :show-inheritance:
62
+ :undoc-members:
63
+
64
+ Module contents
65
+ ---------------
66
+
67
+ .. automodule:: fastfusion.frontend
68
+ :members:
69
+ :show-inheritance:
70
+ :undoc-members:
@@ -0,0 +1,21 @@
1
+ fastfusion.frontend.workload package
2
+ ====================================
3
+
4
+ Submodules
5
+ ----------
6
+
7
+ fastfusion.frontend.workload.workload module
8
+ --------------------------------------------
9
+
10
+ .. automodule:: fastfusion.frontend.workload.workload
11
+ :members:
12
+ :show-inheritance:
13
+ :undoc-members:
14
+
15
+ Module contents
16
+ ---------------
17
+
18
+ .. automodule:: fastfusion.frontend.workload
19
+ :members:
20
+ :show-inheritance:
21
+ :undoc-members:
@@ -0,0 +1,37 @@
1
+ fastfusion.mapper.FFM package
2
+ =============================
3
+
4
+ Submodules
5
+ ----------
6
+
7
+ fastfusion.mapper.FFM.main module
8
+ ---------------------------------
9
+
10
+ .. automodule:: fastfusion.mapper.FFM.main
11
+ :members:
12
+ :show-inheritance:
13
+ :undoc-members:
14
+
15
+ fastfusion.mapper.FFM.mappings module
16
+ -------------------------------------
17
+
18
+ .. automodule:: fastfusion.mapper.FFM.mappings
19
+ :members:
20
+ :show-inheritance:
21
+ :undoc-members:
22
+
23
+ fastfusion.mapper.FFM.pmappings module
24
+ --------------------------------------
25
+
26
+ .. automodule:: fastfusion.mapper.FFM.pmappings
27
+ :members:
28
+ :show-inheritance:
29
+ :undoc-members:
30
+
31
+ Module contents
32
+ ---------------
33
+
34
+ .. automodule:: fastfusion.mapper.FFM
35
+ :members:
36
+ :show-inheritance:
37
+ :undoc-members:
@@ -0,0 +1,18 @@
1
+ fastfusion.mapper package
2
+ =========================
3
+
4
+ Subpackages
5
+ -----------
6
+
7
+ .. toctree::
8
+ :maxdepth: 4
9
+
10
+ fastfusion.mapper.FFM
11
+
12
+ Module contents
13
+ ---------------
14
+
15
+ .. automodule:: fastfusion.mapper
16
+ :members:
17
+ :show-inheritance:
18
+ :undoc-members:
@@ -0,0 +1,20 @@
1
+ fastfusion package
2
+ ==================
3
+
4
+ Subpackages
5
+ -----------
6
+
7
+ .. toctree::
8
+ :maxdepth: 4
9
+
10
+ fastfusion.frontend
11
+ fastfusion.mapper
12
+ fastfusion.util
13
+
14
+ Module contents
15
+ ---------------
16
+
17
+ .. automodule:: fastfusion
18
+ :members:
19
+ :show-inheritance:
20
+ :undoc-members:
@@ -0,0 +1,21 @@
1
+ fastfusion.util package
2
+ =======================
3
+
4
+ Submodules
5
+ ----------
6
+
7
+ fastfusion.util.parallel module
8
+ -------------------------------
9
+
10
+ .. automodule:: fastfusion.util.parallel
11
+ :members:
12
+ :show-inheritance:
13
+ :undoc-members:
14
+
15
+ Module contents
16
+ ---------------
17
+
18
+ .. automodule:: fastfusion.util
19
+ :members:
20
+ :show-inheritance:
21
+ :undoc-members:
@@ -0,0 +1,87 @@
1
+ FastFusion
2
+ ==========
3
+
4
+ FastFusion is a framework to model tensor algebra accelerators. It includes flexible,
5
+ user-defined specifications for components, architectures, and workloads, and, given
6
+ these specifications, quickly finds optimal fused mappings to program the workloads onto
7
+ the architectures.
8
+
9
+ FastFusion is based on multiple other projects. If you use FastFusion in your work,
10
+ please refer to :doc:`notes/citation` for how to cite the relevant projects.
11
+
12
+
13
+ This page includes the following:
14
+
15
+ .. contents::
16
+ :depth: 1
17
+ :local:
18
+ :backlinks: none
19
+
20
+ Installation
21
+ ------------
22
+
23
+ For native installation, install the package from PyPI:
24
+
25
+ .. code-block:: bash
26
+
27
+ pip install fastfusion
28
+
29
+ Examples
30
+ --------
31
+
32
+ Example notebooks can be found by cloning the repository and navigating to the
33
+ ``notebooks/examples`` directory.
34
+
35
+ .. code-block:: bash
36
+
37
+ git clone https://github.com/Accelergy-Project/fastfusion.git
38
+ cd fastfusion/notebooks/examples
39
+ jupyter notebook
40
+
41
+ Additionally, example input files can be found in the ``examples`` directory.
42
+
43
+ .. code-block:: bash
44
+
45
+ git clone https://github.com/Accelergy-Project/fastfusion.git
46
+ cd fastfusion/examples
47
+ ls
48
+
49
+
50
+ Documentation Overview
51
+ ----------------------
52
+
53
+ Documentation is organized into the following sections:
54
+
55
+ - :doc:`Input Specifications <notes/spec>` - Overview of the inputs to fastfusion,
56
+ including specifications of architectures, workloads, and mappings.
57
+ - :doc:`Modeling <notes/modeling>` - How FastFusion models the energy, area, and latency
58
+ of an accelerator running a workload.
59
+ - :doc:`Citation <notes/citation>` - How to cite FastFusion in your work
60
+ - :doc:`Definitions <notes/definitions>` - Definitions of key concepts in FastFusion
61
+ - :doc:`Parsing <notes/parsing>` - Parsing of input specifications
62
+ - :doc:`Frequently Asked Questions <notes/faqs>` - Frequently asked questions about FastFusion
63
+
64
+ API Reference
65
+ -------------
66
+
67
+ The complete API reference is available in the :doc:`modules` section, which includes:
68
+
69
+ - :doc:`fastfusion.frontend <fastfusion.frontend>` - The input specifications for fastfusion
70
+ - :doc:`fastfusion.mapper <fastfusion.mapper>` - Algorithms that map workloads onto architectures
71
+ - :doc:`fastfusion.util <fastfusion.util>` - Utility functions and helpers
72
+
73
+ For detailed API documentation, see the :doc:`modules` section.
74
+
75
+ .. toctree::
76
+ :maxdepth: 2
77
+ :caption: API Reference
78
+ :hidden:
79
+
80
+ modules
81
+
82
+ .. toctree::
83
+ :maxdepth: 1
84
+ :caption: Documentation
85
+ :glob:
86
+
87
+ notes/*
@@ -0,0 +1,7 @@
1
+ fastfusion
2
+ ==========
3
+
4
+ .. toctree::
5
+ :maxdepth: 4
6
+
7
+ fastfusion
@@ -0,0 +1,45 @@
1
+ Citing This Work
2
+ ================
3
+
4
+ **Please cite all of the following papers if you use this work.** This work is the
5
+ combination of the following:
6
+
7
+ - **CiMLoop**: The architecture and component specification.
8
+ - **Fast & Fusiest**: The multi-Einsum mapper.
9
+ - **LoopTree**: The mapping specification.
10
+ - **LoopForest**: The mapspace specification.
11
+ - **Turbo-Charged**: The single-Einsum mapper (and an essential first step for Fast &
12
+ Fusiest).
13
+
14
+ They are available as the following:
15
+
16
+ .. code-block:: latex
17
+
18
+ \cite{cimloop, fast_fusiest, turbo_charged, loop_tree, loopforest}
19
+
20
+ .. code-block:: bibtex
21
+
22
+ @INPROCEEDINGS{cimloop,
23
+ author={Andrulis, Tanner and Emer, Joel S. and Sze, Vivienne},
24
+ booktitle={2024 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)},
25
+ title={CiMLoop: A Flexible, Accurate, and Fast Compute-In-Memory Modeling Tool},
26
+ year={2024},
27
+ volume={},
28
+ number={},
29
+ pages={10-23},
30
+ keywords={Performance evaluation;Accuracy;Computational modeling;Computer architecture;Artificial neural networks;In-memory computing;Data models;Compute-In-Memory;Processing-In-Memory;Analog;Deep Neural Networks;Systems;Hardware;Modeling;Open-Source},
31
+ doi={10.1109/ISPASS61541.2024.00012}}
32
+
33
+ @INPROCEEDINGS{10158176,
34
+ author={Gilbert, Michael and Wu, Yannan Nellie and Parashar, Angshuman and Sze, Vivienne and Emer, Joel S.},
35
+ booktitle={2023 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)},
36
+ title={LoopTree: Enabling Exploration of Fused-layer Dataflow Accelerators},
37
+ year={2023},
38
+ volume={},
39
+ number={},
40
+ pages={316-318},
41
+ keywords={Deep learning;Analytical models;Systematics;Neural networks;Bandwidth;Software;Energy efficiency;analytical modeling;layer fusion;accelerators},
42
+ doi={10.1109/ISPASS57527.2023.00038}}
43
+
44
+
45
+ TODO: More citations
@@ -0,0 +1,43 @@
1
+ Definitions
2
+ ===========
3
+
4
+ Action
5
+ An action is something performed by a hardware unit. For example, a read or a compute.
6
+
7
+ Mapping
8
+ A *mapping* is a schedule that maps operations and data movement onto the hardware.
9
+
10
+ Component
11
+ A component is a hardware unit in the architecture. For example, a memory or a compute
12
+ unit.
13
+
14
+ Dataflow
15
+ The order in which a mappings iterates over tiles, noting that tiles may be abstract
16
+ before the mapping is fully defined. :ref:`Tile`.
17
+
18
+ Dataplacement
19
+ Which tile(s) are stored in each memory level of the accelerator, and for what time
20
+ period, noting that tiles and time periods may be abstract before the mapping is fully
21
+ defined. :ref:`Tile`.
22
+
23
+ Pmapping
24
+ A *partial mapping*, or *pmapping*, is a mapping of a subset of the workload to the
25
+ hardware.
26
+
27
+ Pmapping Template
28
+ A *pmapping template* is a template for a pmapping. It includes all storage nodes
29
+ (dataplacement) and loop nodes (dataflow), but does not have loop bounds defined (tile
30
+ shapes).
31
+
32
+ Reuse
33
+ Reuse occurs when a piece of data is used used in multiple computations, but fetched
34
+ fewer times from some memory. For example, we may fetch a piece of data from DRAM to
35
+ on-chip memory once, then use it in ten computations. This would incur nine reuses of
36
+ the piece of data.
37
+
38
+ Reuse Opportunity
39
+ Reuse opportunity is when a piece of data is used multiple times by the workload. It
40
+ may or may not be turned into reuse if the hardware successfully leverages it.
41
+
42
+ Tile
43
+ TODO
@@ -0,0 +1,39 @@
1
+ Frequently Asked Questions
2
+ ==========================
3
+
4
+ .. contents::
5
+ :depth: 1
6
+ :local:
7
+ :backlinks: none
8
+
9
+ What unit is ... specified in?
10
+ ------------------------------
11
+ We use un-prefixed units for all values. Joules, seconds, meters, square meters, bits,
12
+ etc.
13
+
14
+ Why are some attributes underscored?
15
+ ------------------------------------
16
+
17
+ .. _underscore-discussion:
18
+
19
+ Underscore prefixes are used to indicate that a value is recognized by the frontend.
20
+ They are used in places where there may be a mix of recognized and unrecognized values,
21
+ such as in a :py:class:`~fastfusion.frontend.arch.Component` ``attributes`` dictionary,
22
+ where ``attributes`` may contain recognized fields (such as
23
+ :py:obj:`~fastfusion.frontend.arch.ComponentAttributes.energy`) and
24
+ unrecognized fields (a field that may be used by `hwcomponents
25
+ <https://github.com/Accelergy-Project/hwcomponents>`_, but not this package).
26
+
27
+ When a value is underscored, this package will check whether it is recognized and raise
28
+ an error if it is not. In places where fields may or may not be recognized (e.g.,
29
+ :py:class:`~fastfusion.frontend.arch.ComponentAttributes`,
30
+ :py:class:`~fastfusion.frontend.arch.ActionArguments`), we recommend
31
+ underscore-prefixing all fields that are going to be used by this package.
32
+
33
+ As a result, you may see attributes dictionaries that have a mix of underscored and
34
+ non-underscored fields. The underscored fields will be used by this package, and the
35
+ non-underscored fields will only be used by other parsers of the object (such as
36
+ `hwcomponents <https://github.com/Accelergy-Project/hwcomponents>`_).
37
+
38
+ When an object is initialized with underscore-prefixed fields, all underscores are
39
+ dropped after checking validity.
@@ -0,0 +1,72 @@
1
+ Accelerator Energy, Area, and Latency
2
+ =====================================
3
+
4
+ .. _accelerator-energy-latency:
5
+
6
+ To calculate energy and latency, we first need to look at the number of actions incurred
7
+ by each :py:class:`~fastfusion.frontend.arch.Component` in the architecture.
8
+
9
+ Calculating Number of Actions from A Mapping
10
+ --------------------------------------------
11
+
12
+ .. _calculating-num-actions:
13
+
14
+ Except for :py:class:`~fastfusion.frontend.arch.Compute`\ components (whose number of
15
+ compute actions, barring recomputation, depends only on workload), the number of actions
16
+ incurred by most :py:class:`~fastfusion.frontend.arch.Component`\ s depends on the
17
+ component type, the workload, and the mapping.
18
+
19
+ For :py:class:`~fastfusion.frontend.arch.Memory` and
20
+ :py:class:`~fastfusion.frontend.arch.ProcessingStage` components, the number of actions
21
+ depends on the number of accesses to the component. They may be accessed in two ways:
22
+
23
+ - ``read``: The component is read from a lower-level component, or output values are read
24
+ up to a higher-level component.
25
+ - ``write``: The component is written to a lower-level component, or input values are
26
+ written from a higher-level component.
27
+
28
+ The number of actions incurred by accesses for each tensor are equal to the number of
29
+ values accessed times the datawidth of the tensor (determined by that component's
30
+ :py:class:`~fastfusion.frontend.arch.TensorHolderAttributes`), divided by the
31
+ :py:class:`~fastfusion.frontend.arch.ActionArguments` ``bits_per_action`` attribute. For
32
+ example, if 1024 values are accessed with a datawidth of 16 bits and ``bits_per_action``
33
+ is 32, then 1024 * 16 / 32 = 512 actions are incurred.
34
+
35
+ Read+Modify+Writes (RMWs) to a component are counted as a read and a write. The first
36
+ read of output data is skipped because the value has not been written yet.
37
+
38
+ By default, the ``datawidth`` and ``bits_per_action`` attributes are set to 1.
39
+ Generally, it works to leave these as 1. For example:
40
+
41
+ - If ``bits_per_action`` is 1, then each action accesses one bit, so we can define
42
+ actions in terms of bits accessed
43
+ - If ``datawidth`` is 1 and ``bits_per_action`` is 1, then each action accesses one
44
+ value, so we can define actions in terms of values accessed. Additionally, ``size``
45
+ will then be in terms of number of values that can be held, rather than number of
46
+ bits.
47
+
48
+ The latter case is the default, and you may often see ``datawidth`` and
49
+ ``bits_per_action`` un-set, ``size`` set to the number of values in the tensor, and
50
+ actions defined in terms of values accessed rather than bits.
51
+
52
+
53
+ Calculating Latency from a Pmapping
54
+ -----------------------------------
55
+
56
+ The :py:obj:`~fastfusion.frontend.arch.ComponentAttributes.latency` of a component, defined
57
+ in the class's `attributes.latency` field, is a Python expression that is evaluated
58
+ using the component's actions.
59
+
60
+ The :py:obj:`~fastfusion.frontend.arch.ComponentAttributes.latency` field is
61
+ :docstring-lower:`fastfusion.frontend.arch.ComponentAttributes.latency`
62
+
63
+
64
+ Calculating Area and Leak Power
65
+ -------------------------------
66
+
67
+ After :ref:`component-modeling` is completed, we can get area with the
68
+ :py:meth:`~fastfusion.frontend.arch.Arch.per_component_total_area` and
69
+ :py:meth:`~fastfusion.frontend.arch.Arch.total_area` methods. Similarly, we can get
70
+ leak power with the
71
+ :py:meth:`~fastfusion.frontend.arch.Arch.per_component_total_leak_power` and
72
+ :py:meth:`~fastfusion.frontend.arch.Arch.total_leak_power` methods.
@@ -0,0 +1,96 @@
1
+ Component Energy and Area
2
+ =========================
3
+
4
+ .. _component-modeling:
5
+
6
+ The energy and area of components in the architecture:ref:`architecture` can either be
7
+ specified directly, or by calls to the `HWComponents
8
+ <https://github.com/Accelergy-Project/hwcomponents>`_ library.
9
+
10
+ Calculating Energy and Area
11
+ ---------------------------
12
+
13
+ Component energy and area calculations will populate the following fields for each
14
+ component. If these fields are pre-specified, then they may be used as input to the
15
+ energy and area calculations.
16
+
17
+ - ``attributes.area``: :docstring:`fastfusion.frontend.arch.Component.attributes.area`
18
+ - ``attributes.leak_power``: :docstring:`fastfusion.frontend.arch.Component.attributes.leak_power`
19
+ - ``actions[<action name>].arguments.energy``: :docstring:`fastfusion.frontend.arch.ActionArguments.energy`
20
+ - ``attributes.total_area``: :docstring:`fastfusion.frontend.arch.Component.attributes.total_area`
21
+ - ``attributes.total_leak_power``: :docstring:`fastfusion.frontend.arch.Component.attributes.total_leak_power`
22
+ - ``energy_area_log``: :docstring:`fastfusion.frontend.arch.Component.energy_area_log`
23
+ - ``component_model``: :docstring:`fastfusion.frontend.arch.Component.component_model`
24
+
25
+ Additionally, the following fields will affect the energy and area calculations:
26
+
27
+ - ``attributes.energy``: :docstring:`fastfusion.frontend.arch.Component.attributes.energy`
28
+ - ``attributes.energy_scale``: :docstring:`fastfusion.frontend.arch.Component.attributes.energy_scale`
29
+ - ``attributes.leak_power_scale``: :docstring:`fastfusion.frontend.arch.Component.attributes.leak_power_scale`
30
+ - ``attributes.area_scale``: :docstring:`fastfusion.frontend.arch.Component.attributes.area_scale`
31
+ - ``actions[<action name>].arguments.energy_scale``: :docstring:`fastfusion.frontend.arch.ActionArguments.energy_scale`
32
+
33
+ The energy and area of a all components in the architecture can be calculated by calling
34
+ :py:meth:`~fastfusion.spec.Spec.calculate_component_energy_area`.
35
+
36
+ .. include-notebook:: notebooks/tutorials/component_energy_area.ipynb
37
+ :name: spec_energy_area
38
+ :language: python
39
+
40
+ We can also calculate the energy and area of individual components by calling
41
+ :py:meth:`~fastfusion.arch.Component.calculate_energy_area` on them.
42
+
43
+ .. include-notebook:: notebooks/tutorials/component_energy_area.ipynb
44
+ :name: single_component_energy_area
45
+ :language: python
46
+
47
+ There are additional `Spec.config` fields that affect the energy and area
48
+ calculations:
49
+
50
+ .. include-attrs:: fastfusion.frontend.config.Config
51
+
52
+ Specifying Energy and Area
53
+ ---------------------------
54
+
55
+ One way to specify the area and energy of each component is to directly set the
56
+ ``attributes.area``, ``attributes.leak_power``, or ``actions[<action
57
+ name>].arguments.energy`` fields. The following example from the TPU v4i example
58
+ architecture shows uses this approach:
59
+
60
+ .. include-yaml:: examples/arches/tpu_v4i_like.arch.yaml
61
+ :startfrom: GlobalBuffer
62
+ :same-indent:
63
+
64
+ If any value is omitted, it will raise an appropriate error when
65
+ :py:meth:`~fastfusion.spec.Spec.calculate_component_energy_area` is called, so you may call this
66
+ function to check whether you've missed anything. ``hwcomponents`` is invoked
67
+ automatically if any of the fields are missing. If you don't want it to be called, then
68
+ you can do one of the following:
69
+
70
+ - If calling :py:meth:`~fastfusion.spec.Spec.calculate_component_energy_area`, then you
71
+ can set ``spec.config.component_models`` and
72
+ ``spec.config.use_installed_component_models`` to an empty list and ``False``,
73
+ respectively.
74
+ - If calling :py:meth:`~fastfusion.arch.Component.calculate_energy_area`, then you can set ``models`` to an
75
+ empty list.
76
+
77
+
78
+ Using the ``hwcomponents`` Library
79
+ -----------------------------------
80
+
81
+ ``hwcomponents`` is invoked automatically when area and energy are not specified. The
82
+ following shows the fields used by ``hwcomponents``:
83
+
84
+ .. include:: ../../../examples/misc/component_annotated.yaml
85
+ :code: yaml
86
+
87
+ When ``hwcomponents`` has been used to calculate the energy and area of a component,
88
+ then the ``component_model`` field will be set to the `hwcomponents` model used to
89
+ calculate the energy and area.
90
+
91
+ In addition to looking at the ``energy_area_log`` field, we can further inspect the
92
+ ``component_model`` field to see more information about the model.
93
+
94
+ .. include-notebook:: notebooks/tutorials/component_energy_area.ipynb
95
+ :name: hwcomponents
96
+ :language: python
@@ -0,0 +1,100 @@
1
+ Mapping with Fast & Fusiest
2
+ ===========================
3
+
4
+ Mapping workloads onto accelerators uses the Fast and Fusiest Mapper (FFM), which
5
+ includes of two parts:
6
+
7
+ - The Turbo-Charged Pmapper: This part makes all Pareto-optimal pmappings for all
8
+ Einsums.
9
+ - Fast and Fusiest Mapper (FFM): This part takes the Pareto-optimal pmappings and joins
10
+ them into full mappings.
11
+
12
+ This document will walk you through how to use FFM to map a workload onto an
13
+ accelerator.
14
+
15
+ This document follows the `notebooks/tutorials/FFM.ipynb` notebook.
16
+
17
+ Creating a Spec
18
+ ------------------------
19
+
20
+ Before we dive into the mapper, we need to set up a
21
+ :py:class:`~fastfusion.frontend.spec.Spec` object with the input
22
+ spec. We can initialize
23
+ :py:class:`~fastfusion.frontend.spec.Spec` objects from YAML files.
24
+
25
+ .. include-notebook:: notebooks/tutorials/FFM.ipynb
26
+ :name: make_spec
27
+ :language: python
28
+
29
+ We can set optimization metrics for the mapper by setting the `spec.mapper.ffm.metrics`
30
+ attribute to one of the :py:class:`~fastfusion.mapper.FFM.Metrics` enum values or a
31
+ logical OR (|) of multiple values.
32
+
33
+ The following optimization metrics are available:
34
+
35
+ .. include-attrs:: fastfusion.mapper.FFM.Metrics
36
+
37
+ Making Partial Mappings
38
+ -----------------------
39
+
40
+ We call the Turbo-Charged Pmapper with the
41
+ :py:func:`~fastfusion.mapper.FFM.main.make_pmappings` function. This function returns a
42
+ :py:class:`~fastfusion.mapper.FFM.main.MultiEinsumPmappings` object, which contains all
43
+ Pareto-optimal pmappings for all Einsums.
44
+
45
+ .. include-notebook:: notebooks/tutorials/FFM.ipynb
46
+ :name: make_pmappings
47
+ :language: python
48
+
49
+ In this code, there is a ``max_fused_loops`` parameter that makes mapping faster by
50
+ limiting the number of fused loops that can exist in a single pmapping. The
51
+ ``spec.mapper.ffm`` object has a variety of knobs that can be used to speed up mapping:
52
+
53
+ .. include-attrs:: fastfusion.frontend.mapper.FFM
54
+
55
+ To help with debugging, the :py:func:`~fastfusion.mapper.FFM.main.make_pmappings`
56
+ function will output all pmapping templates that it generates. A pmapping template is a
57
+ pmapping that has not been filled in with tile shapes; meaning that it is a stack of
58
+ loop nodes and storage nodes with loop bounds left unfilled.
59
+
60
+ If no valid pmappings are found for a given Einsum, it may be helpful to inspect the
61
+ pmapping templates outputted. The
62
+ :py:class:`~fastfusion.mapper.FFM.pmappings.MultiEinsumPmappings` object has additional
63
+ functions that can be used to help with debugging:
64
+
65
+ .. include-functions:: fastfusion.mapper.FFM.pmappings.MultiEinsumPmappings
66
+
67
+ Joining Partial Mappings
68
+ ------------------------
69
+
70
+ After we have all Pareto-optimal pmappings for all Einsums, we can join them into full
71
+ mappings with the :py:func:`~fastfusion.mapper.FFM.main.join_pmappings` function. This
72
+ function returns a :py:class:`~fastfusion.mapper.FFM.mappings.Mappings` object, which
73
+ contains all Pareto-optimal mappings found for the given cascade of Einsums.
74
+
75
+ .. include-notebook:: notebooks/tutorials/FFM.ipynb
76
+ :name: join_pmappings
77
+ :language: python
78
+
79
+ Interpreting the Output
80
+ -----------------------
81
+
82
+ The :py:class:`~fastfusion.mapper.FFM.mappings.Mappings` object includes stats for the
83
+ mappings that were found, including, for each pmapping, resource usage and objective
84
+ metrics.
85
+
86
+ To access the stats, we can use the :py:meth:`~fastfusion.mapper.FFM.mappings.Mappings.access`
87
+ method, which will return a :py:class:`~fastfusion.mapper.FFM.mappings.Mappings` object
88
+ with only the columns that match the given key, and with the key removed from the column
89
+ names.
90
+
91
+ For example, if there are three columns ``Total<SEP>Energy``, ``Total<SEP>Area``, and
92
+ ``EinsumA<SEP>Energy``, then ``mapping.access("Total")`` will return a Mappings object
93
+ with columns ``Energy`` and ``Area``, and ``mapping.access("Energy")`` will return a
94
+ Mappings object with columns ``Total`` and ``EinsumA``.
95
+
96
+ To render a mapping, we can use the
97
+ :py:meth:`~fastfusion.mapper.FFM.mappings.Mappings.render` method, which will return a
98
+ string representation of the mapping. In a Jupyter notebook, the mapping will render
99
+ automatically if it is the last object in the cell. Note that if there is more than one
100
+ Pareto-optimal mapping, you must index into a single mapping to render it.