accelforge 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- accelforge/__init__.py +21 -0
- accelforge/_accelerated_imports.py +16 -0
- accelforge/_deprecate/_simanneal/evalmapping.py +271 -0
- accelforge/_deprecate/_simanneal/mapspaceglobals.py +298 -0
- accelforge/_deprecate/_simanneal/simanneal.py +666 -0
- accelforge/_deprecate/_simanneal/tracking.py +105 -0
- accelforge/_deprecate/_simanneal/wrappers.py +218 -0
- accelforge/_deprecate/_simanneal2/__init__.py +7 -0
- accelforge/_deprecate/_simanneal2/simanneal.py +493 -0
- accelforge/_deprecate/_simanneal2/tracking.py +116 -0
- accelforge/_deprecate/compatibility_util.py +181 -0
- accelforge/_deprecate/layerdeduplication/__init__.py +2 -0
- accelforge/_deprecate/layerdeduplication/group_similar_einsums.py +160 -0
- accelforge/_deprecate/layerdeduplication/grouped_einsums.py +84 -0
- accelforge/_deprecate/mapping_filter_tags/__init__.py +2 -0
- accelforge/_deprecate/mapping_filter_tags/ffmt.py +212 -0
- accelforge/_deprecate/mapping_filter_tags/onesplit.py +24 -0
- accelforge/_deprecate/mapping_filter_tags/util.py +24 -0
- accelforge/_deprecate/tags.py +69 -0
- accelforge/_deprecate/viz/__init__.py +0 -0
- accelforge/_deprecate/viz/interactive.py +159 -0
- accelforge/_deprecate/viz/reservationtree.py +307 -0
- accelforge/_deprecate/viz/ski_slope.py +88 -0
- accelforge/_version.py +15 -0
- accelforge/examples.py +39 -0
- accelforge/frontend/__init__.py +10 -0
- accelforge/frontend/_binding.py +129 -0
- accelforge/frontend/_workload_isl/__init__.py +2 -0
- accelforge/frontend/_workload_isl/_isl.py +149 -0
- accelforge/frontend/_workload_isl/_symbolic.py +141 -0
- accelforge/frontend/arch copy.py +1544 -0
- accelforge/frontend/arch.py +1642 -0
- accelforge/frontend/config.py +63 -0
- accelforge/frontend/mapper/__init__.py +5 -0
- accelforge/frontend/mapper/ffm.py +126 -0
- accelforge/frontend/mapper/mapper.py +7 -0
- accelforge/frontend/mapper/metrics.py +30 -0
- accelforge/frontend/mapping/__init__.py +1 -0
- accelforge/frontend/mapping/mapping.py +1736 -0
- accelforge/frontend/model.py +14 -0
- accelforge/frontend/renames.py +150 -0
- accelforge/frontend/spec copy.py +230 -0
- accelforge/frontend/spec.py +301 -0
- accelforge/frontend/variables.py +12 -0
- accelforge/frontend/workload.py +952 -0
- accelforge/mapper/FFM/__init__.py +9 -0
- accelforge/mapper/FFM/_join_pmappings/__init__.py +0 -0
- accelforge/mapper/FFM/_join_pmappings/compatibility.py +653 -0
- accelforge/mapper/FFM/_join_pmappings/compress_pmappings.py +140 -0
- accelforge/mapper/FFM/_join_pmappings/join_pmappings.py +703 -0
- accelforge/mapper/FFM/_join_pmappings/pmapping_dataframe.py +901 -0
- accelforge/mapper/FFM/_join_pmappings/pmapping_group.py +337 -0
- accelforge/mapper/FFM/_make_pmappings/contraints/__init__.py +0 -0
- accelforge/mapper/FFM/_make_pmappings/contraints/constraints.py +360 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/__init__.py +1 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_loops.py +373 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_pmapping_templates.py +463 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_reservations.py +95 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storage_order.py +382 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storages.py +155 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings.py +411 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/__init__.py +1 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_pmappings_from_templates.py +407 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_tile_shapes.py +1681 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/run_model.py +170 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/symbol_relations.py +174 -0
- accelforge/mapper/FFM/_make_pmappings/pmapper_job.py +282 -0
- accelforge/mapper/FFM/_pareto_df/df_convention.py +273 -0
- accelforge/mapper/FFM/_pareto_df/pareto copy.py +836 -0
- accelforge/mapper/FFM/_pareto_df/pareto.py +508 -0
- accelforge/mapper/FFM/data.py +61 -0
- accelforge/mapper/FFM/main copy.py +236 -0
- accelforge/mapper/FFM/main.py +208 -0
- accelforge/mapper/FFM/mappings.py +510 -0
- accelforge/mapper/FFM/pmappings.py +310 -0
- accelforge/mapper/__init__.py +4 -0
- accelforge/mapper.py +0 -0
- accelforge/model/__init__.py +1 -0
- accelforge/model/_looptree/__init__.py +0 -0
- accelforge/model/_looptree/accesses.py +335 -0
- accelforge/model/_looptree/capacity/__init__.py +1 -0
- accelforge/model/_looptree/capacity/aggregators.py +36 -0
- accelforge/model/_looptree/capacity/capacity.py +47 -0
- accelforge/model/_looptree/energy.py +150 -0
- accelforge/model/_looptree/equivalent_ranks.py +29 -0
- accelforge/model/_looptree/latency/__init__.py +1 -0
- accelforge/model/_looptree/latency/latency.py +98 -0
- accelforge/model/_looptree/latency/memory.py +120 -0
- accelforge/model/_looptree/latency/processors.py +92 -0
- accelforge/model/_looptree/mapping_utilities.py +71 -0
- accelforge/model/_looptree/reuse/__init__.py +4 -0
- accelforge/model/_looptree/reuse/isl/__init__.py +1 -0
- accelforge/model/_looptree/reuse/isl/des.py +59 -0
- accelforge/model/_looptree/reuse/isl/isl_functions.py +374 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/__init__.py +4 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/analyze_mapping.py +297 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/skews_from_mapping.py +236 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/tiling.py +685 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/types.py +188 -0
- accelforge/model/_looptree/reuse/isl/spatial.py +260 -0
- accelforge/model/_looptree/reuse/isl/temporal.py +182 -0
- accelforge/model/_looptree/reuse/symbolic/__init__.py +1 -0
- accelforge/model/_looptree/reuse/symbolic/symbolic copy 2.py +1346 -0
- accelforge/model/_looptree/reuse/symbolic/symbolic copy.py +1408 -0
- accelforge/model/_looptree/reuse/symbolic/symbolic.py +1396 -0
- accelforge/model/_looptree/run.py +122 -0
- accelforge/model/_looptree/types.py +26 -0
- accelforge/model/_looptree/visualization/__init__.py +0 -0
- accelforge/model/_looptree/visualization/occupancy.py +11 -0
- accelforge/model/main.py +222 -0
- accelforge/plotting/__init__.py +2 -0
- accelforge/plotting/mappings.py +219 -0
- accelforge/plotting/specs.py +57 -0
- accelforge/util/__init__.py +4 -0
- accelforge/util/_base_analysis_types.py +24 -0
- accelforge/util/_basetypes.py +1089 -0
- accelforge/util/_frozenset.py +36 -0
- accelforge/util/_isl.py +29 -0
- accelforge/util/_itertools.py +14 -0
- accelforge/util/_mathfuncs.py +57 -0
- accelforge/util/_parse_expressions.py +339 -0
- accelforge/util/_picklecache.py +32 -0
- accelforge/util/_setexpressions.py +268 -0
- accelforge/util/_sympy/__init__.py +0 -0
- accelforge/util/_sympy/broadcast_max.py +18 -0
- accelforge/util/_visualization.py +112 -0
- accelforge/util/_yaml.py +579 -0
- accelforge/util/parallel.py +193 -0
- accelforge-0.0.1.dist-info/METADATA +64 -0
- accelforge-0.0.1.dist-info/RECORD +258 -0
- accelforge-0.0.1.dist-info/WHEEL +5 -0
- accelforge-0.0.1.dist-info/licenses/LICENSE +19 -0
- accelforge-0.0.1.dist-info/top_level.txt +5 -0
- docs/_build/html/_sources/fastfusion.frontend.mapper.rst.txt +37 -0
- docs/_build/html/_sources/fastfusion.frontend.rst.txt +70 -0
- docs/_build/html/_sources/fastfusion.frontend.workload.rst.txt +21 -0
- docs/_build/html/_sources/fastfusion.mapper.FFM.rst.txt +37 -0
- docs/_build/html/_sources/fastfusion.mapper.rst.txt +18 -0
- docs/_build/html/_sources/fastfusion.rst.txt +20 -0
- docs/_build/html/_sources/fastfusion.util.rst.txt +21 -0
- docs/_build/html/_sources/index.rst.txt +87 -0
- docs/_build/html/_sources/modules.rst.txt +7 -0
- docs/_build/html/_sources/notes/citation.rst.txt +45 -0
- docs/_build/html/_sources/notes/definitions.rst.txt +43 -0
- docs/_build/html/_sources/notes/faqs.rst.txt +39 -0
- docs/_build/html/_sources/notes/modeling/accelerator_energy_latency.rst.txt +72 -0
- docs/_build/html/_sources/notes/modeling/component_energy_area.rst.txt +96 -0
- docs/_build/html/_sources/notes/modeling/mapping.rst.txt +100 -0
- docs/_build/html/_sources/notes/modeling.rst.txt +33 -0
- docs/_build/html/_sources/notes/parsing/arithmetic_parsing.rst.txt +136 -0
- docs/_build/html/_sources/notes/parsing/setexpressions.rst.txt +63 -0
- docs/_build/html/_sources/notes/parsing/yaml_parsing.rst.txt +176 -0
- docs/_build/html/_sources/notes/quickstart_and_installation.rst.txt +9 -0
- docs/_build/html/_sources/notes/spec/architecture.rst.txt +133 -0
- docs/_build/html/_sources/notes/spec/mapping.rst.txt +12 -0
- docs/_build/html/_sources/notes/spec/workload.rst.txt +83 -0
- docs/_build/html/_sources/notes/spec.rst.txt +36 -0
- docs/source/_ext/include_attrs.py +213 -0
- docs/source/_ext/include_docstring.py +364 -0
- docs/source/_ext/include_functions.py +154 -0
- docs/source/_ext/include_notebook.py +131 -0
- docs/source/_ext/include_yaml.py +119 -0
- docs/source/_ext/inherited_attributes.py +222 -0
- docs/source/_ext/paths.py +4 -0
- docs/source/conf.py +79 -0
- examples/arches/compute_in_memory/_include.yaml +74 -0
- examples/arches/compute_in_memory/_include_functions.py +229 -0
- examples/arches/compute_in_memory/_load_spec.py +57 -0
- examples/arches/compute_in_memory/components/c2c_multiplier.py +181 -0
- examples/arches/compute_in_memory/components/dac_c2c_r2r.py +605 -0
- examples/arches/compute_in_memory/components/misc.py +195 -0
- examples/arches/compute_in_memory/components/util/bit_functions.py +51 -0
- examples/arches/compute_in_memory/components/zero_comparator.py +92 -0
- examples/arches/compute_in_memory/isaac.yaml +233 -0
- examples/arches/compute_in_memory/memory_cells/ecram_demo.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/rram_example.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/rram_isaac_isca_2016.yaml +64 -0
- examples/arches/compute_in_memory/memory_cells/rram_neurosim_default.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/rram_raella_isca_2023.yaml +70 -0
- examples/arches/compute_in_memory/memory_cells/rram_wan_nature_2022.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_colonnade_jssc_2021.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_example.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_jia_jssc_2020.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_sinangil_jssc_2021.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_wang_vlsi_2022.yaml +63 -0
- examples/arches/compute_in_memory/wang_vlsi_2022.yaml +289 -0
- examples/arches/eyeriss.yaml +68 -0
- examples/arches/fanout_variations/at_glb.yaml +31 -0
- examples/arches/fanout_variations/at_glb_with_fanout_node.yaml +34 -0
- examples/arches/fanout_variations/at_mac.yaml +31 -0
- examples/arches/fanout_variations/at_mac_with_constraints.yaml +38 -0
- examples/arches/fanout_variations/at_mac_with_fanout_node.yaml +34 -0
- examples/arches/nvdla.yaml +47 -0
- examples/arches/simple.yaml +28 -0
- examples/arches/tpu_v4i.yaml +67 -0
- examples/mappings/unfused_matmuls_to_simple.yaml +33 -0
- examples/misc/component_annotated.yaml +33 -0
- examples/workloads/gpt3_6.7B.yaml +124 -0
- examples/workloads/matmuls.yaml +20 -0
- examples/workloads/mobilenet_28.yaml +81 -0
- examples/workloads/mobilenet_various_separate.yaml +106 -0
- examples/workloads/three_matmuls_annotated.yaml +59 -0
- notebooks/.ipynb_checkpoints/fastfusion_arch_study_michael-checkpoint.ipynb +359 -0
- notebooks/compute_in_memory/_scripts.py +339 -0
- notebooks/compute_in_memory/isaac.guide.ipynb +270 -0
- notebooks/compute_in_memory/wang_vlsi_2022.ipynb +602 -0
- notebooks/paths.py +4 -0
- notebooks/tutorials/.ipynb_checkpoints/1_FFM-checkpoint.ipynb +3110 -0
- notebooks/tutorials/FFM.ipynb +3498 -0
- notebooks/tutorials/_include.py +48 -0
- notebooks/tutorials/component_energy_area.ipynb +363 -0
- tests/Q_mapping.yaml +38 -0
- tests/__init__.py +0 -0
- tests/conv.mapping.yaml +27 -0
- tests/conv.workload.yaml +13 -0
- tests/conv_sym.mapping.yaml +43 -0
- tests/copy.mapping.yaml +35 -0
- tests/copy.workload.yaml +15 -0
- tests/distribuffers/__init__.py +0 -0
- tests/distribuffers/multicast/test_cases.yaml +482 -0
- tests/distribuffers/spec/binding/valid_bindings.yaml +97 -0
- tests/distribuffers/spec/distributed.yaml +100 -0
- tests/distribuffers/spec/logical_arch.yaml +32 -0
- tests/distribuffers/spec/physical_arch.yaml +69 -0
- tests/distribuffers/test_binding.py +48 -0
- tests/frontend/__init__.py +0 -0
- tests/frontend/test_mapping_viz.py +52 -0
- tests/mapper/__init__.py +0 -0
- tests/mapper/configs/conv1d/conv1d.mapping.yaml +31 -0
- tests/mapper/configs/conv1d/conv1d.workload.yaml +11 -0
- tests/mapper/configs/two_conv1d/two_conv1d.expected.yaml +38 -0
- tests/mapper/configs/two_conv1d/two_conv1d.mapping.yaml +54 -0
- tests/mapper/configs/two_conv1d/two_conv1d.workload.yaml +19 -0
- tests/mapper/test_mapping_to_isl.py +90 -0
- tests/mapper/test_spatial_reuse_analysis.py +67 -0
- tests/mapper/test_temporal_reuse_analysis.py +56 -0
- tests/mapper/util.py +58 -0
- tests/matmul.mapping.yaml +29 -0
- tests/matmul.workload.yaml +12 -0
- tests/matmul_spatial.mapping.yaml +44 -0
- tests/mha.renames.yaml +65 -0
- tests/mha.workload.yaml +67 -0
- tests/mha.yaml +59 -0
- tests/mha_full.workload.yaml +67 -0
- tests/mobilenet.workload.yaml +35 -0
- tests/mobilenet_long.workload.yaml +64 -0
- tests/pmappingcache.py +24 -0
- tests/processing_stage.arch.yaml +40 -0
- tests/snowcat.arch.yaml +36 -0
- tests/test_ffm_join_pmappings.py +106 -0
- tests/test_ffm_make_pmappings.py +82 -0
- tests/test_ffm_make_tile_shapes.py +49 -0
- tests/test_mapper.py +100 -0
- tests/test_model.py +37 -0
- tests/test_plotting.py +72 -0
- tests/test_processing_stage.py +46 -0
- tests/test_symbolic_model.py +248 -0
- tests/test_workload.py +141 -0
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
fastfusion.frontend package
|
|
2
|
+
===========================
|
|
3
|
+
|
|
4
|
+
Subpackages
|
|
5
|
+
-----------
|
|
6
|
+
|
|
7
|
+
.. toctree::
|
|
8
|
+
:maxdepth: 4
|
|
9
|
+
|
|
10
|
+
fastfusion.frontend.mapper
|
|
11
|
+
fastfusion.frontend.workload
|
|
12
|
+
|
|
13
|
+
Submodules
|
|
14
|
+
----------
|
|
15
|
+
|
|
16
|
+
fastfusion.frontend.arch module
|
|
17
|
+
-------------------------------
|
|
18
|
+
|
|
19
|
+
.. automodule:: fastfusion.frontend.arch
|
|
20
|
+
:members:
|
|
21
|
+
:show-inheritance:
|
|
22
|
+
:undoc-members:
|
|
23
|
+
|
|
24
|
+
fastfusion.frontend.config module
|
|
25
|
+
---------------------------------
|
|
26
|
+
|
|
27
|
+
.. automodule:: fastfusion.frontend.config
|
|
28
|
+
:members:
|
|
29
|
+
:show-inheritance:
|
|
30
|
+
:undoc-members:
|
|
31
|
+
|
|
32
|
+
fastfusion.frontend.mapping module
|
|
33
|
+
----------------------------------
|
|
34
|
+
|
|
35
|
+
.. automodule:: fastfusion.frontend.mapping
|
|
36
|
+
:members:
|
|
37
|
+
:show-inheritance:
|
|
38
|
+
:undoc-members:
|
|
39
|
+
|
|
40
|
+
fastfusion.frontend.renames module
|
|
41
|
+
----------------------------------
|
|
42
|
+
|
|
43
|
+
.. automodule:: fastfusion.frontend.renames
|
|
44
|
+
:members:
|
|
45
|
+
:show-inheritance:
|
|
46
|
+
:undoc-members:
|
|
47
|
+
|
|
48
|
+
fastfusion.frontend.spec module
|
|
49
|
+
-------------------------------
|
|
50
|
+
|
|
51
|
+
.. automodule:: fastfusion.frontend.spec
|
|
52
|
+
:members:
|
|
53
|
+
:show-inheritance:
|
|
54
|
+
:undoc-members:
|
|
55
|
+
|
|
56
|
+
fastfusion.frontend.variables module
|
|
57
|
+
------------------------------------
|
|
58
|
+
|
|
59
|
+
.. automodule:: fastfusion.frontend.variables
|
|
60
|
+
:members:
|
|
61
|
+
:show-inheritance:
|
|
62
|
+
:undoc-members:
|
|
63
|
+
|
|
64
|
+
Module contents
|
|
65
|
+
---------------
|
|
66
|
+
|
|
67
|
+
.. automodule:: fastfusion.frontend
|
|
68
|
+
:members:
|
|
69
|
+
:show-inheritance:
|
|
70
|
+
:undoc-members:
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
fastfusion.frontend.workload package
|
|
2
|
+
====================================
|
|
3
|
+
|
|
4
|
+
Submodules
|
|
5
|
+
----------
|
|
6
|
+
|
|
7
|
+
fastfusion.frontend.workload.workload module
|
|
8
|
+
--------------------------------------------
|
|
9
|
+
|
|
10
|
+
.. automodule:: fastfusion.frontend.workload.workload
|
|
11
|
+
:members:
|
|
12
|
+
:show-inheritance:
|
|
13
|
+
:undoc-members:
|
|
14
|
+
|
|
15
|
+
Module contents
|
|
16
|
+
---------------
|
|
17
|
+
|
|
18
|
+
.. automodule:: fastfusion.frontend.workload
|
|
19
|
+
:members:
|
|
20
|
+
:show-inheritance:
|
|
21
|
+
:undoc-members:
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
fastfusion.mapper.FFM package
|
|
2
|
+
=============================
|
|
3
|
+
|
|
4
|
+
Submodules
|
|
5
|
+
----------
|
|
6
|
+
|
|
7
|
+
fastfusion.mapper.FFM.main module
|
|
8
|
+
---------------------------------
|
|
9
|
+
|
|
10
|
+
.. automodule:: fastfusion.mapper.FFM.main
|
|
11
|
+
:members:
|
|
12
|
+
:show-inheritance:
|
|
13
|
+
:undoc-members:
|
|
14
|
+
|
|
15
|
+
fastfusion.mapper.FFM.mappings module
|
|
16
|
+
-------------------------------------
|
|
17
|
+
|
|
18
|
+
.. automodule:: fastfusion.mapper.FFM.mappings
|
|
19
|
+
:members:
|
|
20
|
+
:show-inheritance:
|
|
21
|
+
:undoc-members:
|
|
22
|
+
|
|
23
|
+
fastfusion.mapper.FFM.pmappings module
|
|
24
|
+
--------------------------------------
|
|
25
|
+
|
|
26
|
+
.. automodule:: fastfusion.mapper.FFM.pmappings
|
|
27
|
+
:members:
|
|
28
|
+
:show-inheritance:
|
|
29
|
+
:undoc-members:
|
|
30
|
+
|
|
31
|
+
Module contents
|
|
32
|
+
---------------
|
|
33
|
+
|
|
34
|
+
.. automodule:: fastfusion.mapper.FFM
|
|
35
|
+
:members:
|
|
36
|
+
:show-inheritance:
|
|
37
|
+
:undoc-members:
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
fastfusion.mapper package
|
|
2
|
+
=========================
|
|
3
|
+
|
|
4
|
+
Subpackages
|
|
5
|
+
-----------
|
|
6
|
+
|
|
7
|
+
.. toctree::
|
|
8
|
+
:maxdepth: 4
|
|
9
|
+
|
|
10
|
+
fastfusion.mapper.FFM
|
|
11
|
+
|
|
12
|
+
Module contents
|
|
13
|
+
---------------
|
|
14
|
+
|
|
15
|
+
.. automodule:: fastfusion.mapper
|
|
16
|
+
:members:
|
|
17
|
+
:show-inheritance:
|
|
18
|
+
:undoc-members:
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
fastfusion package
|
|
2
|
+
==================
|
|
3
|
+
|
|
4
|
+
Subpackages
|
|
5
|
+
-----------
|
|
6
|
+
|
|
7
|
+
.. toctree::
|
|
8
|
+
:maxdepth: 4
|
|
9
|
+
|
|
10
|
+
fastfusion.frontend
|
|
11
|
+
fastfusion.mapper
|
|
12
|
+
fastfusion.util
|
|
13
|
+
|
|
14
|
+
Module contents
|
|
15
|
+
---------------
|
|
16
|
+
|
|
17
|
+
.. automodule:: fastfusion
|
|
18
|
+
:members:
|
|
19
|
+
:show-inheritance:
|
|
20
|
+
:undoc-members:
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
fastfusion.util package
|
|
2
|
+
=======================
|
|
3
|
+
|
|
4
|
+
Submodules
|
|
5
|
+
----------
|
|
6
|
+
|
|
7
|
+
fastfusion.util.parallel module
|
|
8
|
+
-------------------------------
|
|
9
|
+
|
|
10
|
+
.. automodule:: fastfusion.util.parallel
|
|
11
|
+
:members:
|
|
12
|
+
:show-inheritance:
|
|
13
|
+
:undoc-members:
|
|
14
|
+
|
|
15
|
+
Module contents
|
|
16
|
+
---------------
|
|
17
|
+
|
|
18
|
+
.. automodule:: fastfusion.util
|
|
19
|
+
:members:
|
|
20
|
+
:show-inheritance:
|
|
21
|
+
:undoc-members:
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
FastFusion
|
|
2
|
+
==========
|
|
3
|
+
|
|
4
|
+
FastFusion is a framework to model tensor algebra accelerators. It includes flexible,
|
|
5
|
+
user-defined specifications for components, architectures, and workloads, and, given
|
|
6
|
+
these specifications, quickly finds optimal fused mappings to program the workloads onto
|
|
7
|
+
the architectures.
|
|
8
|
+
|
|
9
|
+
FastFusion is based on multiple other projects. If you use FastFusion in your work,
|
|
10
|
+
please refer to :doc:`notes/citation` for how to cite the relevant projects.
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
This page includes the following:
|
|
14
|
+
|
|
15
|
+
.. contents::
|
|
16
|
+
:depth: 1
|
|
17
|
+
:local:
|
|
18
|
+
:backlinks: none
|
|
19
|
+
|
|
20
|
+
Installation
|
|
21
|
+
------------
|
|
22
|
+
|
|
23
|
+
For native installation, install the package from PyPI:
|
|
24
|
+
|
|
25
|
+
.. code-block:: bash
|
|
26
|
+
|
|
27
|
+
pip install fastfusion
|
|
28
|
+
|
|
29
|
+
Examples
|
|
30
|
+
--------
|
|
31
|
+
|
|
32
|
+
Example notebooks can be found by cloning the repository and navigating to the
|
|
33
|
+
``notebooks/examples`` directory.
|
|
34
|
+
|
|
35
|
+
.. code-block:: bash
|
|
36
|
+
|
|
37
|
+
git clone https://github.com/Accelergy-Project/fastfusion.git
|
|
38
|
+
cd fastfusion/notebooks/examples
|
|
39
|
+
jupyter notebook
|
|
40
|
+
|
|
41
|
+
Additionally, example input files can be found in the ``examples`` directory.
|
|
42
|
+
|
|
43
|
+
.. code-block:: bash
|
|
44
|
+
|
|
45
|
+
git clone https://github.com/Accelergy-Project/fastfusion.git
|
|
46
|
+
cd fastfusion/examples
|
|
47
|
+
ls
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
Documentation Overview
|
|
51
|
+
----------------------
|
|
52
|
+
|
|
53
|
+
Documentation is organized into the following sections:
|
|
54
|
+
|
|
55
|
+
- :doc:`Input Specifications <notes/spec>` - Overview of the inputs to fastfusion,
|
|
56
|
+
including specifications of architectures, workloads, and mappings.
|
|
57
|
+
- :doc:`Modeling <notes/modeling>` - How FastFusion models the energy, area, and latency
|
|
58
|
+
of an accelerator running a workload.
|
|
59
|
+
- :doc:`Citation <notes/citation>` - How to cite FastFusion in your work
|
|
60
|
+
- :doc:`Definitions <notes/definitions>` - Definitions of key concepts in FastFusion
|
|
61
|
+
- :doc:`Parsing <notes/parsing>` - Parsing of input specifications
|
|
62
|
+
- :doc:`Frequently Asked Questions <notes/faqs>` - Frequently asked questions about FastFusion
|
|
63
|
+
|
|
64
|
+
API Reference
|
|
65
|
+
-------------
|
|
66
|
+
|
|
67
|
+
The complete API reference is available in the :doc:`modules` section, which includes:
|
|
68
|
+
|
|
69
|
+
- :doc:`fastfusion.frontend <fastfusion.frontend>` - The input specifications for fastfusion
|
|
70
|
+
- :doc:`fastfusion.mapper <fastfusion.mapper>` - Algorithms that map workloads onto architectures
|
|
71
|
+
- :doc:`fastfusion.util <fastfusion.util>` - Utility functions and helpers
|
|
72
|
+
|
|
73
|
+
For detailed API documentation, see the :doc:`modules` section.
|
|
74
|
+
|
|
75
|
+
.. toctree::
|
|
76
|
+
:maxdepth: 2
|
|
77
|
+
:caption: API Reference
|
|
78
|
+
:hidden:
|
|
79
|
+
|
|
80
|
+
modules
|
|
81
|
+
|
|
82
|
+
.. toctree::
|
|
83
|
+
:maxdepth: 1
|
|
84
|
+
:caption: Documentation
|
|
85
|
+
:glob:
|
|
86
|
+
|
|
87
|
+
notes/*
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
Citing This Work
|
|
2
|
+
================
|
|
3
|
+
|
|
4
|
+
**Please cite all of the following papers if you use this work.** This work is the
|
|
5
|
+
combination of the following:
|
|
6
|
+
|
|
7
|
+
- **CiMLoop**: The architecture and component specification.
|
|
8
|
+
- **Fast & Fusiest**: The multi-Einsum mapper.
|
|
9
|
+
- **LoopTree**: The mapping specification.
|
|
10
|
+
- **LoopForest**: The mapspace specification.
|
|
11
|
+
- **Turbo-Charged**: The single-Einsum mapper (and an essential first step for Fast &
|
|
12
|
+
Fusiest).
|
|
13
|
+
|
|
14
|
+
They are available as the following:
|
|
15
|
+
|
|
16
|
+
.. code-block:: latex
|
|
17
|
+
|
|
18
|
+
\cite{cimloop, fast_fusiest, turbo_charged, loop_tree, loopforest}
|
|
19
|
+
|
|
20
|
+
.. code-block:: bibtex
|
|
21
|
+
|
|
22
|
+
@INPROCEEDINGS{cimloop,
|
|
23
|
+
author={Andrulis, Tanner and Emer, Joel S. and Sze, Vivienne},
|
|
24
|
+
booktitle={2024 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)},
|
|
25
|
+
title={CiMLoop: A Flexible, Accurate, and Fast Compute-In-Memory Modeling Tool},
|
|
26
|
+
year={2024},
|
|
27
|
+
volume={},
|
|
28
|
+
number={},
|
|
29
|
+
pages={10-23},
|
|
30
|
+
keywords={Performance evaluation;Accuracy;Computational modeling;Computer architecture;Artificial neural networks;In-memory computing;Data models;Compute-In-Memory;Processing-In-Memory;Analog;Deep Neural Networks;Systems;Hardware;Modeling;Open-Source},
|
|
31
|
+
doi={10.1109/ISPASS61541.2024.00012}}
|
|
32
|
+
|
|
33
|
+
@INPROCEEDINGS{10158176,
|
|
34
|
+
author={Gilbert, Michael and Wu, Yannan Nellie and Parashar, Angshuman and Sze, Vivienne and Emer, Joel S.},
|
|
35
|
+
booktitle={2023 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)},
|
|
36
|
+
title={LoopTree: Enabling Exploration of Fused-layer Dataflow Accelerators},
|
|
37
|
+
year={2023},
|
|
38
|
+
volume={},
|
|
39
|
+
number={},
|
|
40
|
+
pages={316-318},
|
|
41
|
+
keywords={Deep learning;Analytical models;Systematics;Neural networks;Bandwidth;Software;Energy efficiency;analytical modeling;layer fusion;accelerators},
|
|
42
|
+
doi={10.1109/ISPASS57527.2023.00038}}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
TODO: More citations
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
Definitions
|
|
2
|
+
===========
|
|
3
|
+
|
|
4
|
+
Action
|
|
5
|
+
An action is something performed by a hardware unit. For example, a read or a compute.
|
|
6
|
+
|
|
7
|
+
Mapping
|
|
8
|
+
A *mapping* is a schedule that maps operations and data movement onto the hardware.
|
|
9
|
+
|
|
10
|
+
Component
|
|
11
|
+
A component is a hardware unit in the architecture. For example, a memory or a compute
|
|
12
|
+
unit.
|
|
13
|
+
|
|
14
|
+
Dataflow
|
|
15
|
+
The order in which a mappings iterates over tiles, noting that tiles may be abstract
|
|
16
|
+
before the mapping is fully defined. :ref:`Tile`.
|
|
17
|
+
|
|
18
|
+
Dataplacement
|
|
19
|
+
Which tile(s) are stored in each memory level of the accelerator, and for what time
|
|
20
|
+
period, noting that tiles and time periods may be abstract before the mapping is fully
|
|
21
|
+
defined. :ref:`Tile`.
|
|
22
|
+
|
|
23
|
+
Pmapping
|
|
24
|
+
A *partial mapping*, or *pmapping*, is a mapping of a subset of the workload to the
|
|
25
|
+
hardware.
|
|
26
|
+
|
|
27
|
+
Pmapping Template
|
|
28
|
+
A *pmapping template* is a template for a pmapping. It includes all storage nodes
|
|
29
|
+
(dataplacement) and loop nodes (dataflow), but does not have loop bounds defined (tile
|
|
30
|
+
shapes).
|
|
31
|
+
|
|
32
|
+
Reuse
|
|
33
|
+
Reuse occurs when a piece of data is used used in multiple computations, but fetched
|
|
34
|
+
fewer times from some memory. For example, we may fetch a piece of data from DRAM to
|
|
35
|
+
on-chip memory once, then use it in ten computations. This would incur nine reuses of
|
|
36
|
+
the piece of data.
|
|
37
|
+
|
|
38
|
+
Reuse Opportunity
|
|
39
|
+
Reuse opportunity is when a piece of data is used multiple times by the workload. It
|
|
40
|
+
may or may not be turned into reuse if the hardware successfully leverages it.
|
|
41
|
+
|
|
42
|
+
Tile
|
|
43
|
+
TODO
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
Frequently Asked Questions
|
|
2
|
+
==========================
|
|
3
|
+
|
|
4
|
+
.. contents::
|
|
5
|
+
:depth: 1
|
|
6
|
+
:local:
|
|
7
|
+
:backlinks: none
|
|
8
|
+
|
|
9
|
+
What unit is ... specified in?
|
|
10
|
+
------------------------------
|
|
11
|
+
We use un-prefixed units for all values. Joules, seconds, meters, square meters, bits,
|
|
12
|
+
etc.
|
|
13
|
+
|
|
14
|
+
Why are some attributes underscored?
|
|
15
|
+
------------------------------------
|
|
16
|
+
|
|
17
|
+
.. _underscore-discussion:
|
|
18
|
+
|
|
19
|
+
Underscore prefixes are used to indicate that a value is recognized by the frontend.
|
|
20
|
+
They are used in places where there may be a mix of recognized and unrecognized values,
|
|
21
|
+
such as in a :py:class:`~fastfusion.frontend.arch.Component` ``attributes`` dictionary,
|
|
22
|
+
where ``attributes`` may contain recognized fields (such as
|
|
23
|
+
:py:obj:`~fastfusion.frontend.arch.ComponentAttributes.energy`) and
|
|
24
|
+
unrecognized fields (a field that may be used by `hwcomponents
|
|
25
|
+
<https://github.com/Accelergy-Project/hwcomponents>`_, but not this package).
|
|
26
|
+
|
|
27
|
+
When a value is underscored, this package will check whether it is recognized and raise
|
|
28
|
+
an error if it is not. In places where fields may or may not be recognized (e.g.,
|
|
29
|
+
:py:class:`~fastfusion.frontend.arch.ComponentAttributes`,
|
|
30
|
+
:py:class:`~fastfusion.frontend.arch.ActionArguments`), we recommend
|
|
31
|
+
underscore-prefixing all fields that are going to be used by this package.
|
|
32
|
+
|
|
33
|
+
As a result, you may see attributes dictionaries that have a mix of underscored and
|
|
34
|
+
non-underscored fields. The underscored fields will be used by this package, and the
|
|
35
|
+
non-underscored fields will only be used by other parsers of the object (such as
|
|
36
|
+
`hwcomponents <https://github.com/Accelergy-Project/hwcomponents>`_).
|
|
37
|
+
|
|
38
|
+
When an object is initialized with underscore-prefixed fields, all underscores are
|
|
39
|
+
dropped after checking validity.
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
Accelerator Energy, Area, and Latency
|
|
2
|
+
=====================================
|
|
3
|
+
|
|
4
|
+
.. _accelerator-energy-latency:
|
|
5
|
+
|
|
6
|
+
To calculate energy and latency, we first need to look at the number of actions incurred
|
|
7
|
+
by each :py:class:`~fastfusion.frontend.arch.Component` in the architecture.
|
|
8
|
+
|
|
9
|
+
Calculating Number of Actions from A Mapping
|
|
10
|
+
--------------------------------------------
|
|
11
|
+
|
|
12
|
+
.. _calculating-num-actions:
|
|
13
|
+
|
|
14
|
+
Except for :py:class:`~fastfusion.frontend.arch.Compute`\ components (whose number of
|
|
15
|
+
compute actions, barring recomputation, depends only on workload), the number of actions
|
|
16
|
+
incurred by most :py:class:`~fastfusion.frontend.arch.Component`\ s depends on the
|
|
17
|
+
component type, the workload, and the mapping.
|
|
18
|
+
|
|
19
|
+
For :py:class:`~fastfusion.frontend.arch.Memory` and
|
|
20
|
+
:py:class:`~fastfusion.frontend.arch.ProcessingStage` components, the number of actions
|
|
21
|
+
depends on the number of accesses to the component. They may be accessed in two ways:
|
|
22
|
+
|
|
23
|
+
- ``read``: The component is read from a lower-level component, or output values are read
|
|
24
|
+
up to a higher-level component.
|
|
25
|
+
- ``write``: The component is written to a lower-level component, or input values are
|
|
26
|
+
written from a higher-level component.
|
|
27
|
+
|
|
28
|
+
The number of actions incurred by accesses for each tensor are equal to the number of
|
|
29
|
+
values accessed times the datawidth of the tensor (determined by that component's
|
|
30
|
+
:py:class:`~fastfusion.frontend.arch.TensorHolderAttributes`), divided by the
|
|
31
|
+
:py:class:`~fastfusion.frontend.arch.ActionArguments` ``bits_per_action`` attribute. For
|
|
32
|
+
example, if 1024 values are accessed with a datawidth of 16 bits and ``bits_per_action``
|
|
33
|
+
is 32, then 1024 * 16 / 32 = 512 actions are incurred.
|
|
34
|
+
|
|
35
|
+
Read+Modify+Writes (RMWs) to a component are counted as a read and a write. The first
|
|
36
|
+
read of output data is skipped because the value has not been written yet.
|
|
37
|
+
|
|
38
|
+
By default, the ``datawidth`` and ``bits_per_action`` attributes are set to 1.
|
|
39
|
+
Generally, it works to leave these as 1. For example:
|
|
40
|
+
|
|
41
|
+
- If ``bits_per_action`` is 1, then each action accesses one bit, so we can define
|
|
42
|
+
actions in terms of bits accessed
|
|
43
|
+
- If ``datawidth`` is 1 and ``bits_per_action`` is 1, then each action accesses one
|
|
44
|
+
value, so we can define actions in terms of values accessed. Additionally, ``size``
|
|
45
|
+
will then be in terms of number of values that can be held, rather than number of
|
|
46
|
+
bits.
|
|
47
|
+
|
|
48
|
+
The latter case is the default, and you may often see ``datawidth`` and
|
|
49
|
+
``bits_per_action`` un-set, ``size`` set to the number of values in the tensor, and
|
|
50
|
+
actions defined in terms of values accessed rather than bits.
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
Calculating Latency from a Pmapping
|
|
54
|
+
-----------------------------------
|
|
55
|
+
|
|
56
|
+
The :py:obj:`~fastfusion.frontend.arch.ComponentAttributes.latency` of a component, defined
|
|
57
|
+
in the class's `attributes.latency` field, is a Python expression that is evaluated
|
|
58
|
+
using the component's actions.
|
|
59
|
+
|
|
60
|
+
The :py:obj:`~fastfusion.frontend.arch.ComponentAttributes.latency` field is
|
|
61
|
+
:docstring-lower:`fastfusion.frontend.arch.ComponentAttributes.latency`
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
Calculating Area and Leak Power
|
|
65
|
+
-------------------------------
|
|
66
|
+
|
|
67
|
+
After :ref:`component-modeling` is completed, we can get area with the
|
|
68
|
+
:py:meth:`~fastfusion.frontend.arch.Arch.per_component_total_area` and
|
|
69
|
+
:py:meth:`~fastfusion.frontend.arch.Arch.total_area` methods. Similarly, we can get
|
|
70
|
+
leak power with the
|
|
71
|
+
:py:meth:`~fastfusion.frontend.arch.Arch.per_component_total_leak_power` and
|
|
72
|
+
:py:meth:`~fastfusion.frontend.arch.Arch.total_leak_power` methods.
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
Component Energy and Area
|
|
2
|
+
=========================
|
|
3
|
+
|
|
4
|
+
.. _component-modeling:
|
|
5
|
+
|
|
6
|
+
The energy and area of components in the architecture:ref:`architecture` can either be
|
|
7
|
+
specified directly, or by calls to the `HWComponents
|
|
8
|
+
<https://github.com/Accelergy-Project/hwcomponents>`_ library.
|
|
9
|
+
|
|
10
|
+
Calculating Energy and Area
|
|
11
|
+
---------------------------
|
|
12
|
+
|
|
13
|
+
Component energy and area calculations will populate the following fields for each
|
|
14
|
+
component. If these fields are pre-specified, then they may be used as input to the
|
|
15
|
+
energy and area calculations.
|
|
16
|
+
|
|
17
|
+
- ``attributes.area``: :docstring:`fastfusion.frontend.arch.Component.attributes.area`
|
|
18
|
+
- ``attributes.leak_power``: :docstring:`fastfusion.frontend.arch.Component.attributes.leak_power`
|
|
19
|
+
- ``actions[<action name>].arguments.energy``: :docstring:`fastfusion.frontend.arch.ActionArguments.energy`
|
|
20
|
+
- ``attributes.total_area``: :docstring:`fastfusion.frontend.arch.Component.attributes.total_area`
|
|
21
|
+
- ``attributes.total_leak_power``: :docstring:`fastfusion.frontend.arch.Component.attributes.total_leak_power`
|
|
22
|
+
- ``energy_area_log``: :docstring:`fastfusion.frontend.arch.Component.energy_area_log`
|
|
23
|
+
- ``component_model``: :docstring:`fastfusion.frontend.arch.Component.component_model`
|
|
24
|
+
|
|
25
|
+
Additionally, the following fields will affect the energy and area calculations:
|
|
26
|
+
|
|
27
|
+
- ``attributes.energy``: :docstring:`fastfusion.frontend.arch.Component.attributes.energy`
|
|
28
|
+
- ``attributes.energy_scale``: :docstring:`fastfusion.frontend.arch.Component.attributes.energy_scale`
|
|
29
|
+
- ``attributes.leak_power_scale``: :docstring:`fastfusion.frontend.arch.Component.attributes.leak_power_scale`
|
|
30
|
+
- ``attributes.area_scale``: :docstring:`fastfusion.frontend.arch.Component.attributes.area_scale`
|
|
31
|
+
- ``actions[<action name>].arguments.energy_scale``: :docstring:`fastfusion.frontend.arch.ActionArguments.energy_scale`
|
|
32
|
+
|
|
33
|
+
The energy and area of a all components in the architecture can be calculated by calling
|
|
34
|
+
:py:meth:`~fastfusion.spec.Spec.calculate_component_energy_area`.
|
|
35
|
+
|
|
36
|
+
.. include-notebook:: notebooks/tutorials/component_energy_area.ipynb
|
|
37
|
+
:name: spec_energy_area
|
|
38
|
+
:language: python
|
|
39
|
+
|
|
40
|
+
We can also calculate the energy and area of individual components by calling
|
|
41
|
+
:py:meth:`~fastfusion.arch.Component.calculate_energy_area` on them.
|
|
42
|
+
|
|
43
|
+
.. include-notebook:: notebooks/tutorials/component_energy_area.ipynb
|
|
44
|
+
:name: single_component_energy_area
|
|
45
|
+
:language: python
|
|
46
|
+
|
|
47
|
+
There are additional `Spec.config` fields that affect the energy and area
|
|
48
|
+
calculations:
|
|
49
|
+
|
|
50
|
+
.. include-attrs:: fastfusion.frontend.config.Config
|
|
51
|
+
|
|
52
|
+
Specifying Energy and Area
|
|
53
|
+
---------------------------
|
|
54
|
+
|
|
55
|
+
One way to specify the area and energy of each component is to directly set the
|
|
56
|
+
``attributes.area``, ``attributes.leak_power``, or ``actions[<action
|
|
57
|
+
name>].arguments.energy`` fields. The following example from the TPU v4i example
|
|
58
|
+
architecture shows uses this approach:
|
|
59
|
+
|
|
60
|
+
.. include-yaml:: examples/arches/tpu_v4i_like.arch.yaml
|
|
61
|
+
:startfrom: GlobalBuffer
|
|
62
|
+
:same-indent:
|
|
63
|
+
|
|
64
|
+
If any value is omitted, it will raise an appropriate error when
|
|
65
|
+
:py:meth:`~fastfusion.spec.Spec.calculate_component_energy_area` is called, so you may call this
|
|
66
|
+
function to check whether you've missed anything. ``hwcomponents`` is invoked
|
|
67
|
+
automatically if any of the fields are missing. If you don't want it to be called, then
|
|
68
|
+
you can do one of the following:
|
|
69
|
+
|
|
70
|
+
- If calling :py:meth:`~fastfusion.spec.Spec.calculate_component_energy_area`, then you
|
|
71
|
+
can set ``spec.config.component_models`` and
|
|
72
|
+
``spec.config.use_installed_component_models`` to an empty list and ``False``,
|
|
73
|
+
respectively.
|
|
74
|
+
- If calling :py:meth:`~fastfusion.arch.Component.calculate_energy_area`, then you can set ``models`` to an
|
|
75
|
+
empty list.
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
Using the ``hwcomponents`` Library
|
|
79
|
+
-----------------------------------
|
|
80
|
+
|
|
81
|
+
``hwcomponents`` is invoked automatically when area and energy are not specified. The
|
|
82
|
+
following shows the fields used by ``hwcomponents``:
|
|
83
|
+
|
|
84
|
+
.. include:: ../../../examples/misc/component_annotated.yaml
|
|
85
|
+
:code: yaml
|
|
86
|
+
|
|
87
|
+
When ``hwcomponents`` has been used to calculate the energy and area of a component,
|
|
88
|
+
then the ``component_model`` field will be set to the `hwcomponents` model used to
|
|
89
|
+
calculate the energy and area.
|
|
90
|
+
|
|
91
|
+
In addition to looking at the ``energy_area_log`` field, we can further inspect the
|
|
92
|
+
``component_model`` field to see more information about the model.
|
|
93
|
+
|
|
94
|
+
.. include-notebook:: notebooks/tutorials/component_energy_area.ipynb
|
|
95
|
+
:name: hwcomponents
|
|
96
|
+
:language: python
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
Mapping with Fast & Fusiest
|
|
2
|
+
===========================
|
|
3
|
+
|
|
4
|
+
Mapping workloads onto accelerators uses the Fast and Fusiest Mapper (FFM), which
|
|
5
|
+
includes of two parts:
|
|
6
|
+
|
|
7
|
+
- The Turbo-Charged Pmapper: This part makes all Pareto-optimal pmappings for all
|
|
8
|
+
Einsums.
|
|
9
|
+
- Fast and Fusiest Mapper (FFM): This part takes the Pareto-optimal pmappings and joins
|
|
10
|
+
them into full mappings.
|
|
11
|
+
|
|
12
|
+
This document will walk you through how to use FFM to map a workload onto an
|
|
13
|
+
accelerator.
|
|
14
|
+
|
|
15
|
+
This document follows the `notebooks/tutorials/FFM.ipynb` notebook.
|
|
16
|
+
|
|
17
|
+
Creating a Spec
|
|
18
|
+
------------------------
|
|
19
|
+
|
|
20
|
+
Before we dive into the mapper, we need to set up a
|
|
21
|
+
:py:class:`~fastfusion.frontend.spec.Spec` object with the input
|
|
22
|
+
spec. We can initialize
|
|
23
|
+
:py:class:`~fastfusion.frontend.spec.Spec` objects from YAML files.
|
|
24
|
+
|
|
25
|
+
.. include-notebook:: notebooks/tutorials/FFM.ipynb
|
|
26
|
+
:name: make_spec
|
|
27
|
+
:language: python
|
|
28
|
+
|
|
29
|
+
We can set optimization metrics for the mapper by setting the `spec.mapper.ffm.metrics`
|
|
30
|
+
attribute to one of the :py:class:`~fastfusion.mapper.FFM.Metrics` enum values or a
|
|
31
|
+
logical OR (|) of multiple values.
|
|
32
|
+
|
|
33
|
+
The following optimization metrics are available:
|
|
34
|
+
|
|
35
|
+
.. include-attrs:: fastfusion.mapper.FFM.Metrics
|
|
36
|
+
|
|
37
|
+
Making Partial Mappings
|
|
38
|
+
-----------------------
|
|
39
|
+
|
|
40
|
+
We call the Turbo-Charged Pmapper with the
|
|
41
|
+
:py:func:`~fastfusion.mapper.FFM.main.make_pmappings` function. This function returns a
|
|
42
|
+
:py:class:`~fastfusion.mapper.FFM.main.MultiEinsumPmappings` object, which contains all
|
|
43
|
+
Pareto-optimal pmappings for all Einsums.
|
|
44
|
+
|
|
45
|
+
.. include-notebook:: notebooks/tutorials/FFM.ipynb
|
|
46
|
+
:name: make_pmappings
|
|
47
|
+
:language: python
|
|
48
|
+
|
|
49
|
+
In this code, there is a ``max_fused_loops`` parameter that makes mapping faster by
|
|
50
|
+
limiting the number of fused loops that can exist in a single pmapping. The
|
|
51
|
+
``spec.mapper.ffm`` object has a variety of knobs that can be used to speed up mapping:
|
|
52
|
+
|
|
53
|
+
.. include-attrs:: fastfusion.frontend.mapper.FFM
|
|
54
|
+
|
|
55
|
+
To help with debugging, the :py:func:`~fastfusion.mapper.FFM.main.make_pmappings`
|
|
56
|
+
function will output all pmapping templates that it generates. A pmapping template is a
|
|
57
|
+
pmapping that has not been filled in with tile shapes; meaning that it is a stack of
|
|
58
|
+
loop nodes and storage nodes with loop bounds left unfilled.
|
|
59
|
+
|
|
60
|
+
If no valid pmappings are found for a given Einsum, it may be helpful to inspect the
|
|
61
|
+
pmapping templates outputted. The
|
|
62
|
+
:py:class:`~fastfusion.mapper.FFM.pmappings.MultiEinsumPmappings` object has additional
|
|
63
|
+
functions that can be used to help with debugging:
|
|
64
|
+
|
|
65
|
+
.. include-functions:: fastfusion.mapper.FFM.pmappings.MultiEinsumPmappings
|
|
66
|
+
|
|
67
|
+
Joining Partial Mappings
|
|
68
|
+
------------------------
|
|
69
|
+
|
|
70
|
+
After we have all Pareto-optimal pmappings for all Einsums, we can join them into full
|
|
71
|
+
mappings with the :py:func:`~fastfusion.mapper.FFM.main.join_pmappings` function. This
|
|
72
|
+
function returns a :py:class:`~fastfusion.mapper.FFM.mappings.Mappings` object, which
|
|
73
|
+
contains all Pareto-optimal mappings found for the given cascade of Einsums.
|
|
74
|
+
|
|
75
|
+
.. include-notebook:: notebooks/tutorials/FFM.ipynb
|
|
76
|
+
:name: join_pmappings
|
|
77
|
+
:language: python
|
|
78
|
+
|
|
79
|
+
Interpreting the Output
|
|
80
|
+
-----------------------
|
|
81
|
+
|
|
82
|
+
The :py:class:`~fastfusion.mapper.FFM.mappings.Mappings` object includes stats for the
|
|
83
|
+
mappings that were found, including, for each pmapping, resource usage and objective
|
|
84
|
+
metrics.
|
|
85
|
+
|
|
86
|
+
To access the stats, we can use the :py:meth:`~fastfusion.mapper.FFM.mappings.Mappings.access`
|
|
87
|
+
method, which will return a :py:class:`~fastfusion.mapper.FFM.mappings.Mappings` object
|
|
88
|
+
with only the columns that match the given key, and with the key removed from the column
|
|
89
|
+
names.
|
|
90
|
+
|
|
91
|
+
For example, if there are three columns ``Total<SEP>Energy``, ``Total<SEP>Area``, and
|
|
92
|
+
``EinsumA<SEP>Energy``, then ``mapping.access("Total")`` will return a Mappings object
|
|
93
|
+
with columns ``Energy`` and ``Area``, and ``mapping.access("Energy")`` will return a
|
|
94
|
+
Mappings object with columns ``Total`` and ``EinsumA``.
|
|
95
|
+
|
|
96
|
+
To render a mapping, we can use the
|
|
97
|
+
:py:meth:`~fastfusion.mapper.FFM.mappings.Mappings.render` method, which will return a
|
|
98
|
+
string representation of the mapping. In a Jupyter notebook, the mapping will render
|
|
99
|
+
automatically if it is the last object in the cell. Note that if there is more than one
|
|
100
|
+
Pareto-optimal mapping, you must index into a single mapping to render it.
|