accelforge 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of accelforge might be problematic. Click here for more details.

Files changed (258) hide show
  1. accelforge/__init__.py +21 -0
  2. accelforge/_accelerated_imports.py +16 -0
  3. accelforge/_deprecate/_simanneal/evalmapping.py +271 -0
  4. accelforge/_deprecate/_simanneal/mapspaceglobals.py +298 -0
  5. accelforge/_deprecate/_simanneal/simanneal.py +666 -0
  6. accelforge/_deprecate/_simanneal/tracking.py +105 -0
  7. accelforge/_deprecate/_simanneal/wrappers.py +218 -0
  8. accelforge/_deprecate/_simanneal2/__init__.py +7 -0
  9. accelforge/_deprecate/_simanneal2/simanneal.py +493 -0
  10. accelforge/_deprecate/_simanneal2/tracking.py +116 -0
  11. accelforge/_deprecate/compatibility_util.py +181 -0
  12. accelforge/_deprecate/layerdeduplication/__init__.py +2 -0
  13. accelforge/_deprecate/layerdeduplication/group_similar_einsums.py +160 -0
  14. accelforge/_deprecate/layerdeduplication/grouped_einsums.py +84 -0
  15. accelforge/_deprecate/mapping_filter_tags/__init__.py +2 -0
  16. accelforge/_deprecate/mapping_filter_tags/ffmt.py +212 -0
  17. accelforge/_deprecate/mapping_filter_tags/onesplit.py +24 -0
  18. accelforge/_deprecate/mapping_filter_tags/util.py +24 -0
  19. accelforge/_deprecate/tags.py +69 -0
  20. accelforge/_deprecate/viz/__init__.py +0 -0
  21. accelforge/_deprecate/viz/interactive.py +159 -0
  22. accelforge/_deprecate/viz/reservationtree.py +307 -0
  23. accelforge/_deprecate/viz/ski_slope.py +88 -0
  24. accelforge/_version.py +15 -0
  25. accelforge/examples.py +39 -0
  26. accelforge/frontend/__init__.py +10 -0
  27. accelforge/frontend/_binding.py +129 -0
  28. accelforge/frontend/_workload_isl/__init__.py +2 -0
  29. accelforge/frontend/_workload_isl/_isl.py +149 -0
  30. accelforge/frontend/_workload_isl/_symbolic.py +141 -0
  31. accelforge/frontend/arch copy.py +1544 -0
  32. accelforge/frontend/arch.py +1642 -0
  33. accelforge/frontend/config.py +63 -0
  34. accelforge/frontend/mapper/__init__.py +5 -0
  35. accelforge/frontend/mapper/ffm.py +126 -0
  36. accelforge/frontend/mapper/mapper.py +7 -0
  37. accelforge/frontend/mapper/metrics.py +30 -0
  38. accelforge/frontend/mapping/__init__.py +1 -0
  39. accelforge/frontend/mapping/mapping.py +1736 -0
  40. accelforge/frontend/model.py +14 -0
  41. accelforge/frontend/renames.py +150 -0
  42. accelforge/frontend/spec copy.py +230 -0
  43. accelforge/frontend/spec.py +301 -0
  44. accelforge/frontend/variables.py +12 -0
  45. accelforge/frontend/workload.py +952 -0
  46. accelforge/mapper/FFM/__init__.py +9 -0
  47. accelforge/mapper/FFM/_join_pmappings/__init__.py +0 -0
  48. accelforge/mapper/FFM/_join_pmappings/compatibility.py +653 -0
  49. accelforge/mapper/FFM/_join_pmappings/compress_pmappings.py +140 -0
  50. accelforge/mapper/FFM/_join_pmappings/join_pmappings.py +703 -0
  51. accelforge/mapper/FFM/_join_pmappings/pmapping_dataframe.py +901 -0
  52. accelforge/mapper/FFM/_join_pmappings/pmapping_group.py +337 -0
  53. accelforge/mapper/FFM/_make_pmappings/contraints/__init__.py +0 -0
  54. accelforge/mapper/FFM/_make_pmappings/contraints/constraints.py +360 -0
  55. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/__init__.py +1 -0
  56. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_loops.py +373 -0
  57. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_pmapping_templates.py +463 -0
  58. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_reservations.py +95 -0
  59. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storage_order.py +382 -0
  60. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storages.py +155 -0
  61. accelforge/mapper/FFM/_make_pmappings/make_pmappings.py +411 -0
  62. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/__init__.py +1 -0
  63. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_pmappings_from_templates.py +407 -0
  64. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_tile_shapes.py +1681 -0
  65. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/run_model.py +170 -0
  66. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/symbol_relations.py +174 -0
  67. accelforge/mapper/FFM/_make_pmappings/pmapper_job.py +282 -0
  68. accelforge/mapper/FFM/_pareto_df/df_convention.py +273 -0
  69. accelforge/mapper/FFM/_pareto_df/pareto copy.py +836 -0
  70. accelforge/mapper/FFM/_pareto_df/pareto.py +508 -0
  71. accelforge/mapper/FFM/data.py +61 -0
  72. accelforge/mapper/FFM/main copy.py +236 -0
  73. accelforge/mapper/FFM/main.py +208 -0
  74. accelforge/mapper/FFM/mappings.py +510 -0
  75. accelforge/mapper/FFM/pmappings.py +310 -0
  76. accelforge/mapper/__init__.py +4 -0
  77. accelforge/mapper.py +0 -0
  78. accelforge/model/__init__.py +1 -0
  79. accelforge/model/_looptree/__init__.py +0 -0
  80. accelforge/model/_looptree/accesses.py +335 -0
  81. accelforge/model/_looptree/capacity/__init__.py +1 -0
  82. accelforge/model/_looptree/capacity/aggregators.py +36 -0
  83. accelforge/model/_looptree/capacity/capacity.py +47 -0
  84. accelforge/model/_looptree/energy.py +150 -0
  85. accelforge/model/_looptree/equivalent_ranks.py +29 -0
  86. accelforge/model/_looptree/latency/__init__.py +1 -0
  87. accelforge/model/_looptree/latency/latency.py +98 -0
  88. accelforge/model/_looptree/latency/memory.py +120 -0
  89. accelforge/model/_looptree/latency/processors.py +92 -0
  90. accelforge/model/_looptree/mapping_utilities.py +71 -0
  91. accelforge/model/_looptree/reuse/__init__.py +4 -0
  92. accelforge/model/_looptree/reuse/isl/__init__.py +1 -0
  93. accelforge/model/_looptree/reuse/isl/des.py +59 -0
  94. accelforge/model/_looptree/reuse/isl/isl_functions.py +374 -0
  95. accelforge/model/_looptree/reuse/isl/mapping_to_isl/__init__.py +4 -0
  96. accelforge/model/_looptree/reuse/isl/mapping_to_isl/analyze_mapping.py +297 -0
  97. accelforge/model/_looptree/reuse/isl/mapping_to_isl/skews_from_mapping.py +236 -0
  98. accelforge/model/_looptree/reuse/isl/mapping_to_isl/tiling.py +685 -0
  99. accelforge/model/_looptree/reuse/isl/mapping_to_isl/types.py +188 -0
  100. accelforge/model/_looptree/reuse/isl/spatial.py +260 -0
  101. accelforge/model/_looptree/reuse/isl/temporal.py +182 -0
  102. accelforge/model/_looptree/reuse/symbolic/__init__.py +1 -0
  103. accelforge/model/_looptree/reuse/symbolic/symbolic copy 2.py +1346 -0
  104. accelforge/model/_looptree/reuse/symbolic/symbolic copy.py +1408 -0
  105. accelforge/model/_looptree/reuse/symbolic/symbolic.py +1396 -0
  106. accelforge/model/_looptree/run.py +122 -0
  107. accelforge/model/_looptree/types.py +26 -0
  108. accelforge/model/_looptree/visualization/__init__.py +0 -0
  109. accelforge/model/_looptree/visualization/occupancy.py +11 -0
  110. accelforge/model/main.py +222 -0
  111. accelforge/plotting/__init__.py +2 -0
  112. accelforge/plotting/mappings.py +219 -0
  113. accelforge/plotting/specs.py +57 -0
  114. accelforge/util/__init__.py +4 -0
  115. accelforge/util/_base_analysis_types.py +24 -0
  116. accelforge/util/_basetypes.py +1089 -0
  117. accelforge/util/_frozenset.py +36 -0
  118. accelforge/util/_isl.py +29 -0
  119. accelforge/util/_itertools.py +14 -0
  120. accelforge/util/_mathfuncs.py +57 -0
  121. accelforge/util/_parse_expressions.py +339 -0
  122. accelforge/util/_picklecache.py +32 -0
  123. accelforge/util/_setexpressions.py +268 -0
  124. accelforge/util/_sympy/__init__.py +0 -0
  125. accelforge/util/_sympy/broadcast_max.py +18 -0
  126. accelforge/util/_visualization.py +112 -0
  127. accelforge/util/_yaml.py +579 -0
  128. accelforge/util/parallel.py +193 -0
  129. accelforge-0.0.1.dist-info/METADATA +64 -0
  130. accelforge-0.0.1.dist-info/RECORD +258 -0
  131. accelforge-0.0.1.dist-info/WHEEL +5 -0
  132. accelforge-0.0.1.dist-info/licenses/LICENSE +19 -0
  133. accelforge-0.0.1.dist-info/top_level.txt +5 -0
  134. docs/_build/html/_sources/fastfusion.frontend.mapper.rst.txt +37 -0
  135. docs/_build/html/_sources/fastfusion.frontend.rst.txt +70 -0
  136. docs/_build/html/_sources/fastfusion.frontend.workload.rst.txt +21 -0
  137. docs/_build/html/_sources/fastfusion.mapper.FFM.rst.txt +37 -0
  138. docs/_build/html/_sources/fastfusion.mapper.rst.txt +18 -0
  139. docs/_build/html/_sources/fastfusion.rst.txt +20 -0
  140. docs/_build/html/_sources/fastfusion.util.rst.txt +21 -0
  141. docs/_build/html/_sources/index.rst.txt +87 -0
  142. docs/_build/html/_sources/modules.rst.txt +7 -0
  143. docs/_build/html/_sources/notes/citation.rst.txt +45 -0
  144. docs/_build/html/_sources/notes/definitions.rst.txt +43 -0
  145. docs/_build/html/_sources/notes/faqs.rst.txt +39 -0
  146. docs/_build/html/_sources/notes/modeling/accelerator_energy_latency.rst.txt +72 -0
  147. docs/_build/html/_sources/notes/modeling/component_energy_area.rst.txt +96 -0
  148. docs/_build/html/_sources/notes/modeling/mapping.rst.txt +100 -0
  149. docs/_build/html/_sources/notes/modeling.rst.txt +33 -0
  150. docs/_build/html/_sources/notes/parsing/arithmetic_parsing.rst.txt +136 -0
  151. docs/_build/html/_sources/notes/parsing/setexpressions.rst.txt +63 -0
  152. docs/_build/html/_sources/notes/parsing/yaml_parsing.rst.txt +176 -0
  153. docs/_build/html/_sources/notes/quickstart_and_installation.rst.txt +9 -0
  154. docs/_build/html/_sources/notes/spec/architecture.rst.txt +133 -0
  155. docs/_build/html/_sources/notes/spec/mapping.rst.txt +12 -0
  156. docs/_build/html/_sources/notes/spec/workload.rst.txt +83 -0
  157. docs/_build/html/_sources/notes/spec.rst.txt +36 -0
  158. docs/source/_ext/include_attrs.py +213 -0
  159. docs/source/_ext/include_docstring.py +364 -0
  160. docs/source/_ext/include_functions.py +154 -0
  161. docs/source/_ext/include_notebook.py +131 -0
  162. docs/source/_ext/include_yaml.py +119 -0
  163. docs/source/_ext/inherited_attributes.py +222 -0
  164. docs/source/_ext/paths.py +4 -0
  165. docs/source/conf.py +79 -0
  166. examples/arches/compute_in_memory/_include.yaml +74 -0
  167. examples/arches/compute_in_memory/_include_functions.py +229 -0
  168. examples/arches/compute_in_memory/_load_spec.py +57 -0
  169. examples/arches/compute_in_memory/components/c2c_multiplier.py +181 -0
  170. examples/arches/compute_in_memory/components/dac_c2c_r2r.py +605 -0
  171. examples/arches/compute_in_memory/components/misc.py +195 -0
  172. examples/arches/compute_in_memory/components/util/bit_functions.py +51 -0
  173. examples/arches/compute_in_memory/components/zero_comparator.py +92 -0
  174. examples/arches/compute_in_memory/isaac.yaml +233 -0
  175. examples/arches/compute_in_memory/memory_cells/ecram_demo.yaml +63 -0
  176. examples/arches/compute_in_memory/memory_cells/rram_example.yaml +63 -0
  177. examples/arches/compute_in_memory/memory_cells/rram_isaac_isca_2016.yaml +64 -0
  178. examples/arches/compute_in_memory/memory_cells/rram_neurosim_default.yaml +63 -0
  179. examples/arches/compute_in_memory/memory_cells/rram_raella_isca_2023.yaml +70 -0
  180. examples/arches/compute_in_memory/memory_cells/rram_wan_nature_2022.yaml +63 -0
  181. examples/arches/compute_in_memory/memory_cells/sram_colonnade_jssc_2021.yaml +63 -0
  182. examples/arches/compute_in_memory/memory_cells/sram_example.yaml +63 -0
  183. examples/arches/compute_in_memory/memory_cells/sram_jia_jssc_2020.yaml +63 -0
  184. examples/arches/compute_in_memory/memory_cells/sram_sinangil_jssc_2021.yaml +63 -0
  185. examples/arches/compute_in_memory/memory_cells/sram_wang_vlsi_2022.yaml +63 -0
  186. examples/arches/compute_in_memory/wang_vlsi_2022.yaml +289 -0
  187. examples/arches/eyeriss.yaml +68 -0
  188. examples/arches/fanout_variations/at_glb.yaml +31 -0
  189. examples/arches/fanout_variations/at_glb_with_fanout_node.yaml +34 -0
  190. examples/arches/fanout_variations/at_mac.yaml +31 -0
  191. examples/arches/fanout_variations/at_mac_with_constraints.yaml +38 -0
  192. examples/arches/fanout_variations/at_mac_with_fanout_node.yaml +34 -0
  193. examples/arches/nvdla.yaml +47 -0
  194. examples/arches/simple.yaml +28 -0
  195. examples/arches/tpu_v4i.yaml +67 -0
  196. examples/mappings/unfused_matmuls_to_simple.yaml +33 -0
  197. examples/misc/component_annotated.yaml +33 -0
  198. examples/workloads/gpt3_6.7B.yaml +124 -0
  199. examples/workloads/matmuls.yaml +20 -0
  200. examples/workloads/mobilenet_28.yaml +81 -0
  201. examples/workloads/mobilenet_various_separate.yaml +106 -0
  202. examples/workloads/three_matmuls_annotated.yaml +59 -0
  203. notebooks/.ipynb_checkpoints/fastfusion_arch_study_michael-checkpoint.ipynb +359 -0
  204. notebooks/compute_in_memory/_scripts.py +339 -0
  205. notebooks/compute_in_memory/isaac.guide.ipynb +270 -0
  206. notebooks/compute_in_memory/wang_vlsi_2022.ipynb +602 -0
  207. notebooks/paths.py +4 -0
  208. notebooks/tutorials/.ipynb_checkpoints/1_FFM-checkpoint.ipynb +3110 -0
  209. notebooks/tutorials/FFM.ipynb +3498 -0
  210. notebooks/tutorials/_include.py +48 -0
  211. notebooks/tutorials/component_energy_area.ipynb +363 -0
  212. tests/Q_mapping.yaml +38 -0
  213. tests/__init__.py +0 -0
  214. tests/conv.mapping.yaml +27 -0
  215. tests/conv.workload.yaml +13 -0
  216. tests/conv_sym.mapping.yaml +43 -0
  217. tests/copy.mapping.yaml +35 -0
  218. tests/copy.workload.yaml +15 -0
  219. tests/distribuffers/__init__.py +0 -0
  220. tests/distribuffers/multicast/test_cases.yaml +482 -0
  221. tests/distribuffers/spec/binding/valid_bindings.yaml +97 -0
  222. tests/distribuffers/spec/distributed.yaml +100 -0
  223. tests/distribuffers/spec/logical_arch.yaml +32 -0
  224. tests/distribuffers/spec/physical_arch.yaml +69 -0
  225. tests/distribuffers/test_binding.py +48 -0
  226. tests/frontend/__init__.py +0 -0
  227. tests/frontend/test_mapping_viz.py +52 -0
  228. tests/mapper/__init__.py +0 -0
  229. tests/mapper/configs/conv1d/conv1d.mapping.yaml +31 -0
  230. tests/mapper/configs/conv1d/conv1d.workload.yaml +11 -0
  231. tests/mapper/configs/two_conv1d/two_conv1d.expected.yaml +38 -0
  232. tests/mapper/configs/two_conv1d/two_conv1d.mapping.yaml +54 -0
  233. tests/mapper/configs/two_conv1d/two_conv1d.workload.yaml +19 -0
  234. tests/mapper/test_mapping_to_isl.py +90 -0
  235. tests/mapper/test_spatial_reuse_analysis.py +67 -0
  236. tests/mapper/test_temporal_reuse_analysis.py +56 -0
  237. tests/mapper/util.py +58 -0
  238. tests/matmul.mapping.yaml +29 -0
  239. tests/matmul.workload.yaml +12 -0
  240. tests/matmul_spatial.mapping.yaml +44 -0
  241. tests/mha.renames.yaml +65 -0
  242. tests/mha.workload.yaml +67 -0
  243. tests/mha.yaml +59 -0
  244. tests/mha_full.workload.yaml +67 -0
  245. tests/mobilenet.workload.yaml +35 -0
  246. tests/mobilenet_long.workload.yaml +64 -0
  247. tests/pmappingcache.py +24 -0
  248. tests/processing_stage.arch.yaml +40 -0
  249. tests/snowcat.arch.yaml +36 -0
  250. tests/test_ffm_join_pmappings.py +106 -0
  251. tests/test_ffm_make_pmappings.py +82 -0
  252. tests/test_ffm_make_tile_shapes.py +49 -0
  253. tests/test_mapper.py +100 -0
  254. tests/test_model.py +37 -0
  255. tests/test_plotting.py +72 -0
  256. tests/test_processing_stage.py +46 -0
  257. tests/test_symbolic_model.py +248 -0
  258. tests/test_workload.py +141 -0
@@ -0,0 +1,33 @@
1
+ How Modeling Works
2
+ ==================
3
+
4
+ .. _accelerator-modeling:
5
+
6
+ Modeling calculates the energy, area, and latency of an architecture running a given
7
+ workload. This is done in three steps:
8
+
9
+ 1. **Per-Component Energy, Area, and Leakage**: This step models the area and leakage
10
+ power of each :py:class:`~fastfusion.frontend.arch.Component` in the architecture.
11
+ It then generates *per-action energy*, which is used by later steps in the model to
12
+ find the energy of performing hardware
13
+ :py:class:`~fastfusion.frontend.arch.ComponentAction`\ s.
14
+
15
+ 2. **Mapping the Workload onto the Accelerator**: This step generates mappings
16
+ :py:class:`~fastfusion.frontend.mapping.Mapping`\ s that map the workload onto the
17
+ hardware.
18
+
19
+ 3. **Modeling the Energy, Area, and Latency of the Mapping**: This step looks at the
20
+ full mapping and calculates the number of hardware actions that occur, using it to
21
+ total the energy and area of the accelerator.
22
+
23
+ In this package, the mapping and modeling steps are connected, letting the mapper
24
+ quickly find mappings that minimize the energy and latency of the accelerator.
25
+
26
+ These steps are detailed in the following sections:
27
+
28
+ .. toctree::
29
+ :maxdepth: 1
30
+
31
+ modeling/component_energy_area
32
+ modeling/accelerator_energy_latency
33
+ modeling/mapping
@@ -0,0 +1,136 @@
1
+ Arithmetic and Parsing
2
+ ======================
3
+
4
+ Objects can include expressions that are parsed when the
5
+ :py:class:`~fastfusion.frontend.spec.Spec` is parsed. Parsing occurs when the
6
+ :py:func:`~fastfusion.frontend.spec.Spec` is going to be used to model the energy, area,
7
+ or latency of an accelerator, such as when the
8
+ :py:func:`~fastfusion.frontend.spec.Spec.calculate_component_energy_area` method is
9
+ called.
10
+
11
+ To-be-parsed expressions can include Python code, and supported
12
+ operations include many standard library functions (*e.g.,* ``range``, ``min``) and
13
+ functions from the ``math`` standard library (*e.g.,* ``log2``, ``ceil``).
14
+
15
+ The scope available for parsing includes the following in order of increasing
16
+ precedence:
17
+
18
+ - Variables defined in a top-level :py:class:`~fastfusion.frontend.variables.Variables`
19
+ object.
20
+ - Variables defined in outer-level YAML objects. Dictionary keys can be referenced by
21
+ names, and list entries by index. The dot syntax can be used to access dictionaries;
22
+ for example, ``x.y.z`` is equivalent to ``outer_scope["x"]["y"]["z"]``.
23
+ - Variables defined in the current YAML object. Dictionary keys may reference each other
24
+ as long as references are not cyclic.
25
+
26
+ The following is an example of valid parsed data:
27
+
28
+ .. code-block:: yaml
29
+
30
+ variables:
31
+ a: 123
32
+ b: a + 5
33
+ c: min(b, 3)
34
+ d: sum(y for y in range(1, 10))
35
+
36
+ # In some later scope
37
+ ... outer_scope:
38
+ x: 123
39
+ y: a + x # Reference top-level variables
40
+ inner_scope:
41
+ a: 3 # Override outer scope
42
+ b: outer_scope.x
43
+ # Statements can be out-of-order if not cyclic referencing
44
+ firt_item: second_item
45
+ second_item: 3
46
+
47
+ Additionally, values can be set directly in Python code. For example:
48
+
49
+ .. code-block:: python
50
+
51
+ from fastfusion.frontend.arch import ComponentAttributes
52
+ attributes = ComponentAttributes(
53
+ value1=123,
54
+ value2="value1 + 5"
55
+ # ... other attributes
56
+ )
57
+
58
+
59
+ Supported Arithmetic Operations
60
+ -------------------------------
61
+
62
+ The following are available expressions. In addition to the below, Python keywords that
63
+ are available witout import (*e.g.,* ``min``) are also available
64
+
65
+ - ``ceil``: :py:func:`math.ceil`
66
+ - ``comb``: `math.comb`
67
+ - ``copysign``: `math.copysign`
68
+ - ``fabs``: :py:func:`math.fabs`
69
+ - ``factorial``: :py:func:`math.factorial`
70
+ - ``floor``: :py:func:`math.floor`
71
+ - ``fmod``: :py:func:`math.fmod`
72
+ - ``frexp``: :py:func:`math.frexp`
73
+ - ``fsum``: :py:func:`math.fsum`
74
+ - ``gcd``: :py:func:`math.gcd`
75
+ - ``isclose``: `math.isclose`
76
+ - ``isfinite``: :py:func:`math.isfinite`
77
+ - ``isinf``: :py:func:`math.isinf`
78
+ - ``isnan``: :py:func:`math.isnan`
79
+ - ``isqrt``: :py:func:`math.isqrt`
80
+ - ``ldexp``: :py:func:`math.ldexp`
81
+ - ``modf``: :py:func:`math.modf`
82
+ - ``perm``: :py:func:`math.perm`
83
+ - ``prod``: :py:func:`math.prod`
84
+ - ``remainder``: :py:func:`math.remainder`
85
+ - ``trunc``: :py:func:`math.trunc`
86
+ - ``exp``: :py:func:`math.exp`
87
+ - ``expm1``: :py:func:`math.expm1`
88
+ - ``log``: :py:func:`math.log`
89
+ - ``log1p``: :py:func:`math.log1p`
90
+ - ``log2``: :py:func:`math.log2`
91
+ - ``log10``: :py:func:`math.log10`
92
+ - ``pow``: :py:func:`math.pow`
93
+ - ``sqrt``: :py:func:`math.sqrt`
94
+ - ``acos``: :py:func:`math.acos`
95
+ - ``asin``: :py:func:`math.asin`
96
+ - ``atan``: :py:func:`math.atan`
97
+ - ``atan2``: :py:func:`math.atan2`
98
+ - ``cos``: :py:func:`math.cos`
99
+ - ``dist``: :py:func:`math.dist`
100
+ - ``hypot``: :py:func:`math.hypot`
101
+ - ``sin``: :py:func:`math.sin`
102
+ - ``tan``: :py:func:`math.tan`
103
+ - ``degrees``: :py:func:`math.degrees`
104
+ - ``radians``: :py:func:`math.radians`
105
+ - ``acosh``: :py:func:`math.acosh`
106
+ - ``asinh``: :py:func:`math.asinh`
107
+ - ``atanh``: :py:func:`math.atanh`
108
+ - ``cosh``: :py:func:`math.cosh`
109
+ - ``sinh``: :py:func:`math.sinh`
110
+ - ``tanh``: :py:func:`math.tanh`
111
+ - ``erf``: :py:func:`math.erf`
112
+ - ``erfc``: :py:func:`math.erfc`
113
+ - ``gamma``: :py:func:`math.gamma`
114
+ - ``lgamma``: :py:func:`math.lgamma`
115
+ - ``pi``: :py:func:`math.pi`
116
+ - ``e``: :py:func:`math.e`
117
+ - ``tau``: :py:func:`math.tau`
118
+ - ``inf``: :py:func:`math.inf`
119
+ - ``nan``: :py:func:`math.nan`
120
+ - ``abs``: :py:func:`abs`
121
+ - ``round``: :py:func:`round`
122
+ - ``pow``: :py:func:`pow`
123
+ - ``sum``: :py:func:`sum`
124
+ - ``range``: :py:func:`range`
125
+ - ``len``: :py:func:`len`
126
+ - ``min``: :py:func:`min`
127
+ - ``max``: :py:func:`max`
128
+ - ``float``: :py:func:`float`
129
+ - ``int``: :py:func:`int`
130
+ - ``str``: :py:func:`str`
131
+ - ``bool``: :py:func:`bool`
132
+ - ``list``: :py:func:`list`
133
+ - ``tuple``: :py:func:`tuple`
134
+ - ``enumerate``: :py:func:`enumerate`
135
+ - ``getcwd``: :py:func:`os.getcwd`
136
+ - ``map``: :py:func:`map`
@@ -0,0 +1,63 @@
1
+ .. _set-expressions:
2
+
3
+ Set Expressions
4
+ ===============
5
+
6
+ Set expressions are used to describe sets of tensors and rank variables. Set expressions
7
+ are parsed for each pmapping template, meaning that they can reference specific tensors
8
+ for each Einsum.
9
+
10
+ As an example of a set expression, we can describe all tensors that are not intermediates
11
+ using the following:
12
+
13
+ .. code-block:: yaml
14
+
15
+ ~Intermediates
16
+
17
+ Set expressions can use the full Python syntax, including the following:
18
+
19
+ - ``&``: Intersection
20
+ - ``|``: Union
21
+ - ``~``: Complement
22
+ - ``-``: Difference
23
+
24
+ You may also use Pythonic language with set expressions in some locations. For example,
25
+ we may want to use input tensors if and only if there are three or fewer total tensors:
26
+
27
+ .. code-block:: yaml
28
+
29
+ Inputs if len(All) > 3 else All
30
+
31
+ Set expressions are parsed for every Einsum + Flattened-Architecture:ref:`flattening`
32
+ combination. The following set expressions are supported:
33
+
34
+ - ``All``: All tensors used in the current Einsum.
35
+ - ``Inputs``: Tensors input to the current Einsum.
36
+ - ``Intermediates``: Tensors produced by one Einsum and consumed by another.
37
+ - ``Nothing``: The empty set.
38
+ - ``Outputs``: Tensors output from the current Einsum.
39
+ - ``Persistent``: Tensors that must remain in backing storage for the full duration of
40
+ the workload's execution. See:ref:`persistent-tensors`.
41
+ - ``Shared``: Tensors that are shared between multiple Einsums.
42
+ - ``Tensors``: Alias for ``All``.
43
+
44
+ Additionally, the following special variables are available:
45
+
46
+ - ``<Any Tensor Name>``: Resolves to the tensor with the given name. If the tensor is
47
+ not used in the current Einsum, then it resolves to the empty set.
48
+ - ``Einsum``: The name of the currently-processed Einsum. May be used in expressions
49
+ such as ``Inputs if Einsum == "Conv" else All``.
50
+ - ``EinsumObject``: For complex logic using the Einsum object directly.
51
+ - ``MemoryObject.Tensors``: The set of all tensors that are stored in the memory object.
52
+ Architectures are parsed from the top down, so this will only be available
53
+ ``MemoryObject`` has been parsed. Lower-level memory objects may reference upper-level
54
+ memory objects, but not vice versa.
55
+
56
+ All tensor expressions can be converted into relevant rank variables by accessing
57
+ ``.rank_variables``, which will return the set of all rank variables that index into the
58
+ tensor. If multiple tensors are referenced, then the union of all indexing rank
59
+ variables is returned. For example, `MemoryObject.Tensors.rank_variables` will return
60
+ the set of all rank variables that index into any of the tensors stored in
61
+ `MemoryObject`.
62
+
63
+ Additional keys can be defined following :ref:`renaming-tensors-rank-variables`.
@@ -0,0 +1,176 @@
1
+ YAML Parsing
2
+ ============
3
+
4
+ FastFusion inputs can be parsed from YAML files. YAML parsing occurs once when YAML
5
+ files are loaded into Python.
6
+
7
+ We use an extended version of the standard YAML syntax, including the ``<<`` and ``<<<``
8
+ operators. ``<<``, when used as a dictionary key, will merge the contents of its value
9
+ with the current dictionary. ``<<<`` will merge the contents of its value and will merge
10
+ nested dictionaries. The ``!nomerge`` tag will block merging from occuring.
11
+
12
+ The following is a YAML parsing cheat sheet:
13
+
14
+ .. code-block:: yaml
15
+
16
+ # YAML Nodes
17
+ listNode:
18
+ - element1
19
+ - element2
20
+
21
+ dict_node:
22
+ key1: value1
23
+ key2: value2
24
+
25
+ # Styles
26
+ list_block_style:
27
+ - element1
28
+ - element2
29
+ list_flow_style: {element1, element2}
30
+
31
+ dict_block_style:
32
+ key1: value1
33
+ key2: value2
34
+ dict_flow_style: {key1: value1, key2: value2}
35
+
36
+ # Anchors, Aliases, and Merge Keys
37
+
38
+ # Anchors
39
+ anchored_list_flow_style: &my_anchored_list
40
+ - element1
41
+ - element2
42
+ anchored_list_block_style: &my_anchored_list [1, 2, 3, 4, 5]
43
+
44
+ anchored_dict_flow_style: &my_anchored_dict
45
+ key1: value1
46
+ key2: value2
47
+ anchored_dict_block_style: &my_anchored_dict {key1: value1, key2: value2}
48
+
49
+ # Aliases
50
+ my_list_alias: *my_anchored_list
51
+ result_of_my_list_alias: [1, 2, 3, 4, 5]
52
+
53
+ my_dict_alias: *my_anchored_dict
54
+ result_of_my_dict_alias: {key1: value1, key2: value2}
55
+
56
+ # Merge Keys
57
+ anchored_dict_1: &my_anchored_dict
58
+ key1: value1_dict1
59
+ key2: value2_dict1
60
+
61
+ anchored_dict_2: &my_anchored_dict2
62
+ key2: value2_dict2
63
+ key3: value3_dict2
64
+
65
+ merged_dict:
66
+ <<: [*my_anchored_dict, *my_anchored_dict2] # My_anchored_dict takes precedence
67
+
68
+ result_of_merged_dict:
69
+ key1: value1_dict1
70
+ key2: value2_dict1 # Earlier anchors take precedence
71
+ key3: value3_dict2
72
+
73
+ merged_dict2:
74
+ <<: *my_anchored_dict
75
+ value2: override_value2 # Override value2
76
+
77
+ result_of_merged_dict2:
78
+ key1: value1_dict1
79
+ key2: override_value2
80
+
81
+ # Hierarchical Merge Keys
82
+ anchored_dict_hierarchical_1: &my_anchored_dict
83
+ key1: value1_dict1
84
+ key2: {subkey1: subvalue1, subkey2: subvalue2}
85
+ mylist: [d, e, f]
86
+ mylist_nomerge: [4, 5, 6]
87
+
88
+ merged_dict_hierarchical:
89
+ <<<: *my_anchored_dict
90
+ key2: {subkey1: override1} # subkey2: subvalue2 will come from the merge
91
+ mylist: [a, b, c]
92
+ mylist_nomerge: !nomerge [1, 2, 3]
93
+
94
+ result_of_merged_dict_hierarchical:
95
+ key1: value1_dict1
96
+ key2: {subkey1: override1, subkey2: subvalue2}
97
+ mylist: [a, b, c, d, e, f]
98
+ mylist_nomerge: [1, 2, 3]
99
+
100
+ merged_dict_non_hierarchical:
101
+ <<: *my_anchored_dict
102
+ key2: {subkey1: override1} # This will override all of key2
103
+ mylist: [a, b, c]
104
+ mylist_nomerge: !nomerge [1, 2, 3]
105
+
106
+ result_of_merged_dict_non_hierarchical:
107
+ key1: value1_dict1
108
+ key2: {subkey1: override1}
109
+ mylist: [a, b, c]
110
+ mylist_nomerge: [1, 2, 3]
111
+
112
+
113
+
114
+ Jinja2 Templating
115
+ -----------------
116
+
117
+ We also support Jinja2 templating. To substitute Jinja2 variables, the
118
+ ``jinja_parse_data`` argument can be passed to the
119
+ :py:meth:`~fastfusion.util.basetypes.FromYAMLAble.from_yaml` function. Additional Jinja2
120
+ functions are also supported, including:
121
+
122
+ - ``add_to_path(path)``: Add a path to the search path for the ``include`` function.
123
+
124
+ - ``cwd()``: Return the current working directory.
125
+
126
+ - ``find_path(path)``: Find a file in the search path and return the path to the file.
127
+
128
+ - ``include(path, key)``: Include a file and return the value of the key. For example,
129
+ ``include(path/x.yaml, a)`` will open the file ``path/x.yaml``, look for a top-level
130
+ dictionary, and return the ``a`` key from that dictionary. Multiple levels of indexing
131
+ can be used, such as ``include(path/x.yaml, a.b.c)``.
132
+
133
+ - ``include_all(path, key)``: Include all files in a directory and return the value of the
134
+ key. For example, ``include_all(path/dir, a)`` will open all files in the directory
135
+ ``path/dir``, look for a top-level dictionary, and return the ``a`` key from that dictionary.
136
+
137
+ - ``include_text(path)``: Include a file and return the text of the file.
138
+
139
+ - ``path_exists(path)``: Check if a file exists in the search path.
140
+
141
+ The following is a Jinja2 template cheat sheet:
142
+
143
+ .. code-block:: yaml
144
+
145
+ # Add files to be included in the environment
146
+ {{add_to_path('path/to/some/dir')}}
147
+ {{add_to_path('path/to/some/other/dir')}}
148
+
149
+ variables:
150
+ var1: 5
151
+ var3: "{{cwd()}}/some_file.yaml" # {{cwd()}} is the directory of this file
152
+ var4: "{{find_path('some_file.yaml')}}" # find_path searches all paths added by add_to_path
153
+ var5: {{set_by_jinja}} # Sets the value to a "set_by_jinja" variable that must be defined
154
+
155
+ {% if path_exists('some_file.yaml') %} # Check if a file exists
156
+ var6: "some_file.yaml exists" # Include this line if the file exists
157
+ {% else %}
158
+
159
+ arch:
160
+ # Include a subset of the file. Index into the structure with
161
+ # dot-separated keys.
162
+ nodes: {{include('other.arch.yaml', 'arch.nodes')}}
163
+
164
+ # Include the entire file
165
+ {{include_text('grab_text_from_file.yaml')}}
166
+
167
+ compound_components:
168
+ # Include the subsets of multiple files. They will be merged into one list.
169
+ classes: {{include_all('compound_components/*.yaml', 'compound_components.classes')}}
170
+
171
+
172
+ {% if enable_text_flag|default(False) %}
173
+ text_included_if_enable_text_flag_is_true: |
174
+ This text will be included if enable_text_flag is true. The |default(False) sets
175
+ the default value of enable_text_flag to False if it is not set.
176
+ {% endif %}
@@ -0,0 +1,9 @@
1
+ Installation
2
+ ============
3
+
4
+ FastFusion can be installed with pip:
5
+
6
+ ```bash
7
+ pip install fastfusion
8
+ ```
9
+
@@ -0,0 +1,133 @@
1
+ Arch Specification
2
+ ==================
3
+
4
+ The architecture, defined by the :py:class:`~fastfusion.frontend.arch.Arch` class,
5
+ describes the hardware that is running the workload. An architecture is represented as a
6
+ tree, where branches in the tree represent different compute paths that may be taken.
7
+ For the rest of this section, we will assume that the architecture has been *flattened*,
8
+ meaning that there are no branches in the tree. The flattening procedure is described in
9
+ :ref:`flattening`.
10
+
11
+ A flattened architecture is a hierarchy of components with a
12
+ :py:class:`~fastfusion.frontend.arch.Compute` at the bottom. The following components
13
+ are supported:
14
+
15
+ - :py:class:`~fastfusion.frontend.arch.Memory` components store and reuse data.
16
+ - :py:class:`~fastfusion.frontend.arch.ProcessingStage` components perform some
17
+ non-compute action (*e.g.,* quantizing or transferring data).
18
+ - :py:class:`~fastfusion.frontend.arch.Compute` components performs the Einsum's
19
+ computation.
20
+
21
+ In the architecture file, each component is represented by a YAML dictionary. Component
22
+ types are preceded by the ``!`` character. An example architecture is shown below:
23
+
24
+ .. include:: ../../../../examples/arches/tpu_v4i_like.arch.yaml
25
+ :code: yaml
26
+
27
+
28
+ Flattening
29
+ ----------
30
+
31
+ A given Einsum may be executed only on a single
32
+ :py:class:`~fastfusion.frontend.arch.Compute`, and it may use hardware objects between
33
+ the root of the tree and the leaf for that
34
+ :py:class:`~fastfusion.frontend.arch.Compute`. Flattening an architecture converts a
35
+ tree architecture into multiple parallel *Flattened-Architectures*, each one
36
+ representing one possible path from the root of the tree to the leaf for that
37
+ :py:class:`~fastfusion.frontend.arch.Compute`.
38
+
39
+ For example, in the architecture above, there are two compute units, the ``scalar_unit``
40
+ and the ``mac``. Flattening this architecture will produce two Flattened-Architectures;
41
+ one with a ``scalar_unit`` and one with a ``mac``. The partial mappings for each of
42
+ these architectures can be combined, and can share hardware that exists above both
43
+ compute units.
44
+
45
+ Inserting a :py:class:`~fastfusion.frontend.arch.Compute` directly into the top-level
46
+ architecture hierarchy will create an optional compute path that goes from the top node
47
+ to the compute. More complex topologies (*e.g.,* give an upper-level compute a private
48
+ cache) can be created by creating sub-branches following :ref:`sub-branches`.
49
+
50
+
51
+ Sub-Branches
52
+ ------------
53
+
54
+ .. _sub-branches:
55
+
56
+ Sub-branches in the architecture can represent different execution paths. The following
57
+ branch types are supported:
58
+
59
+ - :py:class:`~fastfusion.frontend.arch.Parallel` represents multiple parallel branches,
60
+ one of which is executed.
61
+ - :py:class:`~fastfusion.frontend.arch.Hierarchical` represents a single hierarchy,
62
+ where each node is a parent of the following nodes.
63
+
64
+ Sub-branches are written with the following syntax:
65
+
66
+ .. code-block:: yaml
67
+
68
+ - !Memory
69
+ ...
70
+
71
+ - !Memory
72
+ ...
73
+
74
+ - !Parallel
75
+ nodes:
76
+ - !Hierarchical
77
+ nodes:
78
+ - ... # First-branch nodes
79
+ - !Hierarchical
80
+ nodes:
81
+ - ... # Second-branch nodes
82
+
83
+ # If more nodes go down here, they are children of the outer-level node, not the
84
+ !Parallel node.
85
+ - !Memory
86
+ ...
87
+
88
+ The top-level :py:class:`~fastfusion.frontend.arch.Arch` is a
89
+ :py:class:`~fastfusion.frontend.arch.Hierarchical`.
90
+
91
+
92
+ Spatial Fanouts
93
+ ---------------
94
+
95
+ Spatial fanouts describe the spatial organization of components in the architecture. Any
96
+ component may have spatial fanouts, and fanouts are allowed in any dimension. For
97
+ example, in the architecture above, the ``LocalBuffer`` component has a size-4 spatial
98
+ fanout in the ``Z`` dimension, meaning that there are 4 instances of the component. All
99
+ child components are duplicated in the ``Z`` dimension as well.
100
+
101
+ The ``ArrayFanout`` component also has a spatial fanout in two dimensions, the
102
+ ``reuse_input`` and ``reuse_output`` dimensions.
103
+ :py:class:`~fastfusion.frontend.arch.Fanout` components can be used to instantiate
104
+ spatial fanouts.
105
+
106
+ Reuse in spatial dimensions may be controlled with the ``may_reuse`` keyword, which
107
+ takes in a set expression that is parsed according to :ref:`set-expressions`. In the
108
+ example, nothing is reused spatially betweeen ``LocalBuffer`` instances, while inputs
109
+ and outputs are reused across registers in the ``reuse_input`` and ``reuse_output``
110
+ dimensions, respectively. Additionally, the ``must_reuse`` keyword can be used to force
111
+ reuse; for example, ``must_reuse: input`` means that all spatial instances must use the
112
+ same input values, else the mapping will be invalid.
113
+
114
+ Spatial fanouts support the following keywords:
115
+
116
+ .. include-attrs:: fastfusion.frontend.arch.Spatial
117
+
118
+ Tensor Holders
119
+ --------------
120
+
121
+ Tensor holders, which include :py:class:`~fastfusion.frontend.arch.Memory` and
122
+ :py:class:`~fastfusion.frontend.arch.Fanout` components, hold tensors. Each of them
123
+ support extra attributes in their ``attributes`` field, so check
124
+ :py:class:`~fastfusion.frontend.arch.MemoryAttributes` and
125
+ :py:class:`~fastfusion.frontend.arch.FanoutAttributes` for more information on the
126
+ attributes that they support.
127
+
128
+ Additionally, they have an additional ``tensors`` field, which is used to define the
129
+ tensors that are held by the component. They are represented by the
130
+ :py:class:`~fastfusion.frontend.constraints.Tensors` class, which supports the following
131
+ fields:
132
+
133
+ .. include-attrs:: fastfusion.frontend.constraints.Tensors
@@ -0,0 +1,12 @@
1
+ .. _specifying-mapping:
2
+
3
+ Mapping Specification
4
+ =====================
5
+
6
+
7
+
8
+ What is a LoopTree
9
+ ------------------
10
+
11
+ Reading LoopTrees
12
+ -----------------
@@ -0,0 +1,83 @@
1
+ .. _specifying-workload:
2
+
3
+ Workload and Renames Specification
4
+ ==================================
5
+
6
+ The :py:class:`~fastfusion.frontend.workload` object describes a cascade of
7
+ Einsums. An Einsum, described in ..., can represent a variety of tensor algebra kernels,
8
+ and a cascade of Einsums is a list of Einsums with data dependencies.
9
+
10
+ The following is an example workload for three back-to-back matrix multiplications:
11
+
12
+ .. include:: ../../../../examples/workloads/three_matmuls.workload.yaml
13
+ :code: yaml
14
+
15
+ The top-level Workload spec has the following attributes:
16
+
17
+ .. include-attrs:: fastfusion.frontend.workload
18
+
19
+ Each Einsum in the workload represents a single Einsum with the following attributes:
20
+
21
+ .. include-attrs:: fastfusion.frontend.workload.Einsum
22
+
23
+ And each tensor access has the following attributes:
24
+
25
+ .. include-attrs:: fastfusion.frontend.workload.TensorAccess
26
+
27
+ Workloads include *ranks* and *rank variables*. Ranks are the dimensions of the tensors
28
+ in the Einsum, while rank variables are variables that index into these ranks. Generally
29
+ the rank names are uppercased versions of the rank variable names, but not always. In
30
+ more-complex workloads (such as the GPT example later in this doc), there may be cases
31
+ where we index into a rank with multiple different rank variables-- in this case, we may
32
+ use a projection dictionary instead of a list.
33
+
34
+ .. code-block:: yaml
35
+
36
+ - name: Matmul0
37
+ tensor_accesses:
38
+ - {name: T0, projection: [m, n0]} # Implies projection: {M: m, N0: n0}
39
+ - {name: W1, projection: [k, n0]} # Implies projection: {K: k, N0: n0}
40
+ - {name: T1, projection: [n0, n1], output: True} # Implies projection: {N0: n0, N1: n1}
41
+
42
+ - name: Matmul1
43
+ tensor_accesses:
44
+ # We can be explicit about the projection
45
+ - {name: T1, projection: {M: m, N1: n1}}
46
+ - {name: W1, projection: {N1: n1, N2: n2}}
47
+ - {name: T2, projection: {M: m, N2: n2}, output: True}
48
+
49
+ Renaming Tensors and Rank Variables
50
+ -----------------------------------
51
+ :label:`renaming-tensors-rank-variables`
52
+
53
+ Renames allow us to write simple, generic names (*e.g.,* ``input``,
54
+ ``reduced_rank_variable``) in our set expresssions and have them resolve to tensors or
55
+ rank variable in the Einsum.
56
+
57
+ Each Einsum object has a ``renames`` attribute. This attribute may be populated with one
58
+ of the following:
59
+
60
+ - A dictionary of ``{new_name: source_set_expression}`` expressions, where
61
+ ``source_set_expression`` may resolve either to tensors or rank variables. This is the
62
+ simplest method.
63
+ - A list of dictionaries, each one having the structure ``{name: new_name, source:
64
+ source_set_expression, expected_count: 1}``. This method allows you to write an
65
+ expected count, which is optional, and checks that your set expression returned the
66
+ expected number of elements. For example, if your source set expression were
67
+ ``Outputs()``, an expected count of 1 would pass if there were only one output tensor,
68
+ but fail if there were two.
69
+
70
+
71
+ Additionally, you may define a separate top-level
72
+ :py:class:`~fastfusion.frontend.renames.Renames` object with structure mirroring the
73
+ workload. For example, one is in the bottom of the following workload:
74
+
75
+ .. include:: ../../../../examples/workloads/gpt3_6.7B.workload.yaml
76
+ :code: yaml
77
+
78
+ This renames format includes, for every Einsum, a ``tensor_accesses`` key and a
79
+ ``rank_variables`` key. Both support the above dictionary or list-of-dictionary rename
80
+ formats.
81
+
82
+ If an Einsum in the renames is named ``default``, then its renames are applied to every
83
+ Einsum unless overridden.
@@ -0,0 +1,36 @@
1
+ Input Specifications
2
+ ====================
3
+
4
+ The :py:class:`~fastfusion.frontend.spec.Spec` class is the main class that contains all
5
+ inputs to this framework. It includes the following:
6
+
7
+ .. include-attrs:: fastfusion.frontend.spec.Spec
8
+
9
+ Some of the Spec's inputs are described in the following sections:
10
+
11
+ .. toctree::
12
+ :maxdepth: 1
13
+
14
+ spec/architecture
15
+ spec/mapping
16
+ spec/workload
17
+
18
+ Input Parsing
19
+ -------------
20
+
21
+ Input specifications can include arithmetic expressions and set expressions. The parsing
22
+ is described in the following:
23
+
24
+ .. toctree::
25
+ :maxdepth: 1
26
+
27
+ parsing/arithmetic_parsing
28
+ parsing/setexpressions
29
+
30
+ Additionally, inputs can be specified with YAML files using an extend YAML syntax, which
31
+ is described in the following:
32
+
33
+ .. toctree::
34
+ :maxdepth: 1
35
+
36
+ parsing/yaml_parsing