accelforge 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of accelforge might be problematic. Click here for more details.
- accelforge/__init__.py +21 -0
- accelforge/_accelerated_imports.py +16 -0
- accelforge/_deprecate/_simanneal/evalmapping.py +271 -0
- accelforge/_deprecate/_simanneal/mapspaceglobals.py +298 -0
- accelforge/_deprecate/_simanneal/simanneal.py +666 -0
- accelforge/_deprecate/_simanneal/tracking.py +105 -0
- accelforge/_deprecate/_simanneal/wrappers.py +218 -0
- accelforge/_deprecate/_simanneal2/__init__.py +7 -0
- accelforge/_deprecate/_simanneal2/simanneal.py +493 -0
- accelforge/_deprecate/_simanneal2/tracking.py +116 -0
- accelforge/_deprecate/compatibility_util.py +181 -0
- accelforge/_deprecate/layerdeduplication/__init__.py +2 -0
- accelforge/_deprecate/layerdeduplication/group_similar_einsums.py +160 -0
- accelforge/_deprecate/layerdeduplication/grouped_einsums.py +84 -0
- accelforge/_deprecate/mapping_filter_tags/__init__.py +2 -0
- accelforge/_deprecate/mapping_filter_tags/ffmt.py +212 -0
- accelforge/_deprecate/mapping_filter_tags/onesplit.py +24 -0
- accelforge/_deprecate/mapping_filter_tags/util.py +24 -0
- accelforge/_deprecate/tags.py +69 -0
- accelforge/_deprecate/viz/__init__.py +0 -0
- accelforge/_deprecate/viz/interactive.py +159 -0
- accelforge/_deprecate/viz/reservationtree.py +307 -0
- accelforge/_deprecate/viz/ski_slope.py +88 -0
- accelforge/_version.py +15 -0
- accelforge/examples.py +39 -0
- accelforge/frontend/__init__.py +10 -0
- accelforge/frontend/_binding.py +129 -0
- accelforge/frontend/_workload_isl/__init__.py +2 -0
- accelforge/frontend/_workload_isl/_isl.py +149 -0
- accelforge/frontend/_workload_isl/_symbolic.py +141 -0
- accelforge/frontend/arch copy.py +1544 -0
- accelforge/frontend/arch.py +1642 -0
- accelforge/frontend/config.py +63 -0
- accelforge/frontend/mapper/__init__.py +5 -0
- accelforge/frontend/mapper/ffm.py +126 -0
- accelforge/frontend/mapper/mapper.py +7 -0
- accelforge/frontend/mapper/metrics.py +30 -0
- accelforge/frontend/mapping/__init__.py +1 -0
- accelforge/frontend/mapping/mapping.py +1736 -0
- accelforge/frontend/model.py +14 -0
- accelforge/frontend/renames.py +150 -0
- accelforge/frontend/spec copy.py +230 -0
- accelforge/frontend/spec.py +301 -0
- accelforge/frontend/variables.py +12 -0
- accelforge/frontend/workload.py +952 -0
- accelforge/mapper/FFM/__init__.py +9 -0
- accelforge/mapper/FFM/_join_pmappings/__init__.py +0 -0
- accelforge/mapper/FFM/_join_pmappings/compatibility.py +653 -0
- accelforge/mapper/FFM/_join_pmappings/compress_pmappings.py +140 -0
- accelforge/mapper/FFM/_join_pmappings/join_pmappings.py +703 -0
- accelforge/mapper/FFM/_join_pmappings/pmapping_dataframe.py +901 -0
- accelforge/mapper/FFM/_join_pmappings/pmapping_group.py +337 -0
- accelforge/mapper/FFM/_make_pmappings/contraints/__init__.py +0 -0
- accelforge/mapper/FFM/_make_pmappings/contraints/constraints.py +360 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/__init__.py +1 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_loops.py +373 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_pmapping_templates.py +463 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_reservations.py +95 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storage_order.py +382 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storages.py +155 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings.py +411 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/__init__.py +1 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_pmappings_from_templates.py +407 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_tile_shapes.py +1681 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/run_model.py +170 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/symbol_relations.py +174 -0
- accelforge/mapper/FFM/_make_pmappings/pmapper_job.py +282 -0
- accelforge/mapper/FFM/_pareto_df/df_convention.py +273 -0
- accelforge/mapper/FFM/_pareto_df/pareto copy.py +836 -0
- accelforge/mapper/FFM/_pareto_df/pareto.py +508 -0
- accelforge/mapper/FFM/data.py +61 -0
- accelforge/mapper/FFM/main copy.py +236 -0
- accelforge/mapper/FFM/main.py +208 -0
- accelforge/mapper/FFM/mappings.py +510 -0
- accelforge/mapper/FFM/pmappings.py +310 -0
- accelforge/mapper/__init__.py +4 -0
- accelforge/mapper.py +0 -0
- accelforge/model/__init__.py +1 -0
- accelforge/model/_looptree/__init__.py +0 -0
- accelforge/model/_looptree/accesses.py +335 -0
- accelforge/model/_looptree/capacity/__init__.py +1 -0
- accelforge/model/_looptree/capacity/aggregators.py +36 -0
- accelforge/model/_looptree/capacity/capacity.py +47 -0
- accelforge/model/_looptree/energy.py +150 -0
- accelforge/model/_looptree/equivalent_ranks.py +29 -0
- accelforge/model/_looptree/latency/__init__.py +1 -0
- accelforge/model/_looptree/latency/latency.py +98 -0
- accelforge/model/_looptree/latency/memory.py +120 -0
- accelforge/model/_looptree/latency/processors.py +92 -0
- accelforge/model/_looptree/mapping_utilities.py +71 -0
- accelforge/model/_looptree/reuse/__init__.py +4 -0
- accelforge/model/_looptree/reuse/isl/__init__.py +1 -0
- accelforge/model/_looptree/reuse/isl/des.py +59 -0
- accelforge/model/_looptree/reuse/isl/isl_functions.py +374 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/__init__.py +4 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/analyze_mapping.py +297 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/skews_from_mapping.py +236 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/tiling.py +685 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/types.py +188 -0
- accelforge/model/_looptree/reuse/isl/spatial.py +260 -0
- accelforge/model/_looptree/reuse/isl/temporal.py +182 -0
- accelforge/model/_looptree/reuse/symbolic/__init__.py +1 -0
- accelforge/model/_looptree/reuse/symbolic/symbolic copy 2.py +1346 -0
- accelforge/model/_looptree/reuse/symbolic/symbolic copy.py +1408 -0
- accelforge/model/_looptree/reuse/symbolic/symbolic.py +1396 -0
- accelforge/model/_looptree/run.py +122 -0
- accelforge/model/_looptree/types.py +26 -0
- accelforge/model/_looptree/visualization/__init__.py +0 -0
- accelforge/model/_looptree/visualization/occupancy.py +11 -0
- accelforge/model/main.py +222 -0
- accelforge/plotting/__init__.py +2 -0
- accelforge/plotting/mappings.py +219 -0
- accelforge/plotting/specs.py +57 -0
- accelforge/util/__init__.py +4 -0
- accelforge/util/_base_analysis_types.py +24 -0
- accelforge/util/_basetypes.py +1089 -0
- accelforge/util/_frozenset.py +36 -0
- accelforge/util/_isl.py +29 -0
- accelforge/util/_itertools.py +14 -0
- accelforge/util/_mathfuncs.py +57 -0
- accelforge/util/_parse_expressions.py +339 -0
- accelforge/util/_picklecache.py +32 -0
- accelforge/util/_setexpressions.py +268 -0
- accelforge/util/_sympy/__init__.py +0 -0
- accelforge/util/_sympy/broadcast_max.py +18 -0
- accelforge/util/_visualization.py +112 -0
- accelforge/util/_yaml.py +579 -0
- accelforge/util/parallel.py +193 -0
- accelforge-0.0.1.dist-info/METADATA +64 -0
- accelforge-0.0.1.dist-info/RECORD +258 -0
- accelforge-0.0.1.dist-info/WHEEL +5 -0
- accelforge-0.0.1.dist-info/licenses/LICENSE +19 -0
- accelforge-0.0.1.dist-info/top_level.txt +5 -0
- docs/_build/html/_sources/fastfusion.frontend.mapper.rst.txt +37 -0
- docs/_build/html/_sources/fastfusion.frontend.rst.txt +70 -0
- docs/_build/html/_sources/fastfusion.frontend.workload.rst.txt +21 -0
- docs/_build/html/_sources/fastfusion.mapper.FFM.rst.txt +37 -0
- docs/_build/html/_sources/fastfusion.mapper.rst.txt +18 -0
- docs/_build/html/_sources/fastfusion.rst.txt +20 -0
- docs/_build/html/_sources/fastfusion.util.rst.txt +21 -0
- docs/_build/html/_sources/index.rst.txt +87 -0
- docs/_build/html/_sources/modules.rst.txt +7 -0
- docs/_build/html/_sources/notes/citation.rst.txt +45 -0
- docs/_build/html/_sources/notes/definitions.rst.txt +43 -0
- docs/_build/html/_sources/notes/faqs.rst.txt +39 -0
- docs/_build/html/_sources/notes/modeling/accelerator_energy_latency.rst.txt +72 -0
- docs/_build/html/_sources/notes/modeling/component_energy_area.rst.txt +96 -0
- docs/_build/html/_sources/notes/modeling/mapping.rst.txt +100 -0
- docs/_build/html/_sources/notes/modeling.rst.txt +33 -0
- docs/_build/html/_sources/notes/parsing/arithmetic_parsing.rst.txt +136 -0
- docs/_build/html/_sources/notes/parsing/setexpressions.rst.txt +63 -0
- docs/_build/html/_sources/notes/parsing/yaml_parsing.rst.txt +176 -0
- docs/_build/html/_sources/notes/quickstart_and_installation.rst.txt +9 -0
- docs/_build/html/_sources/notes/spec/architecture.rst.txt +133 -0
- docs/_build/html/_sources/notes/spec/mapping.rst.txt +12 -0
- docs/_build/html/_sources/notes/spec/workload.rst.txt +83 -0
- docs/_build/html/_sources/notes/spec.rst.txt +36 -0
- docs/source/_ext/include_attrs.py +213 -0
- docs/source/_ext/include_docstring.py +364 -0
- docs/source/_ext/include_functions.py +154 -0
- docs/source/_ext/include_notebook.py +131 -0
- docs/source/_ext/include_yaml.py +119 -0
- docs/source/_ext/inherited_attributes.py +222 -0
- docs/source/_ext/paths.py +4 -0
- docs/source/conf.py +79 -0
- examples/arches/compute_in_memory/_include.yaml +74 -0
- examples/arches/compute_in_memory/_include_functions.py +229 -0
- examples/arches/compute_in_memory/_load_spec.py +57 -0
- examples/arches/compute_in_memory/components/c2c_multiplier.py +181 -0
- examples/arches/compute_in_memory/components/dac_c2c_r2r.py +605 -0
- examples/arches/compute_in_memory/components/misc.py +195 -0
- examples/arches/compute_in_memory/components/util/bit_functions.py +51 -0
- examples/arches/compute_in_memory/components/zero_comparator.py +92 -0
- examples/arches/compute_in_memory/isaac.yaml +233 -0
- examples/arches/compute_in_memory/memory_cells/ecram_demo.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/rram_example.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/rram_isaac_isca_2016.yaml +64 -0
- examples/arches/compute_in_memory/memory_cells/rram_neurosim_default.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/rram_raella_isca_2023.yaml +70 -0
- examples/arches/compute_in_memory/memory_cells/rram_wan_nature_2022.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_colonnade_jssc_2021.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_example.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_jia_jssc_2020.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_sinangil_jssc_2021.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_wang_vlsi_2022.yaml +63 -0
- examples/arches/compute_in_memory/wang_vlsi_2022.yaml +289 -0
- examples/arches/eyeriss.yaml +68 -0
- examples/arches/fanout_variations/at_glb.yaml +31 -0
- examples/arches/fanout_variations/at_glb_with_fanout_node.yaml +34 -0
- examples/arches/fanout_variations/at_mac.yaml +31 -0
- examples/arches/fanout_variations/at_mac_with_constraints.yaml +38 -0
- examples/arches/fanout_variations/at_mac_with_fanout_node.yaml +34 -0
- examples/arches/nvdla.yaml +47 -0
- examples/arches/simple.yaml +28 -0
- examples/arches/tpu_v4i.yaml +67 -0
- examples/mappings/unfused_matmuls_to_simple.yaml +33 -0
- examples/misc/component_annotated.yaml +33 -0
- examples/workloads/gpt3_6.7B.yaml +124 -0
- examples/workloads/matmuls.yaml +20 -0
- examples/workloads/mobilenet_28.yaml +81 -0
- examples/workloads/mobilenet_various_separate.yaml +106 -0
- examples/workloads/three_matmuls_annotated.yaml +59 -0
- notebooks/.ipynb_checkpoints/fastfusion_arch_study_michael-checkpoint.ipynb +359 -0
- notebooks/compute_in_memory/_scripts.py +339 -0
- notebooks/compute_in_memory/isaac.guide.ipynb +270 -0
- notebooks/compute_in_memory/wang_vlsi_2022.ipynb +602 -0
- notebooks/paths.py +4 -0
- notebooks/tutorials/.ipynb_checkpoints/1_FFM-checkpoint.ipynb +3110 -0
- notebooks/tutorials/FFM.ipynb +3498 -0
- notebooks/tutorials/_include.py +48 -0
- notebooks/tutorials/component_energy_area.ipynb +363 -0
- tests/Q_mapping.yaml +38 -0
- tests/__init__.py +0 -0
- tests/conv.mapping.yaml +27 -0
- tests/conv.workload.yaml +13 -0
- tests/conv_sym.mapping.yaml +43 -0
- tests/copy.mapping.yaml +35 -0
- tests/copy.workload.yaml +15 -0
- tests/distribuffers/__init__.py +0 -0
- tests/distribuffers/multicast/test_cases.yaml +482 -0
- tests/distribuffers/spec/binding/valid_bindings.yaml +97 -0
- tests/distribuffers/spec/distributed.yaml +100 -0
- tests/distribuffers/spec/logical_arch.yaml +32 -0
- tests/distribuffers/spec/physical_arch.yaml +69 -0
- tests/distribuffers/test_binding.py +48 -0
- tests/frontend/__init__.py +0 -0
- tests/frontend/test_mapping_viz.py +52 -0
- tests/mapper/__init__.py +0 -0
- tests/mapper/configs/conv1d/conv1d.mapping.yaml +31 -0
- tests/mapper/configs/conv1d/conv1d.workload.yaml +11 -0
- tests/mapper/configs/two_conv1d/two_conv1d.expected.yaml +38 -0
- tests/mapper/configs/two_conv1d/two_conv1d.mapping.yaml +54 -0
- tests/mapper/configs/two_conv1d/two_conv1d.workload.yaml +19 -0
- tests/mapper/test_mapping_to_isl.py +90 -0
- tests/mapper/test_spatial_reuse_analysis.py +67 -0
- tests/mapper/test_temporal_reuse_analysis.py +56 -0
- tests/mapper/util.py +58 -0
- tests/matmul.mapping.yaml +29 -0
- tests/matmul.workload.yaml +12 -0
- tests/matmul_spatial.mapping.yaml +44 -0
- tests/mha.renames.yaml +65 -0
- tests/mha.workload.yaml +67 -0
- tests/mha.yaml +59 -0
- tests/mha_full.workload.yaml +67 -0
- tests/mobilenet.workload.yaml +35 -0
- tests/mobilenet_long.workload.yaml +64 -0
- tests/pmappingcache.py +24 -0
- tests/processing_stage.arch.yaml +40 -0
- tests/snowcat.arch.yaml +36 -0
- tests/test_ffm_join_pmappings.py +106 -0
- tests/test_ffm_make_pmappings.py +82 -0
- tests/test_ffm_make_tile_shapes.py +49 -0
- tests/test_mapper.py +100 -0
- tests/test_model.py +37 -0
- tests/test_plotting.py +72 -0
- tests/test_processing_stage.py +46 -0
- tests/test_symbolic_model.py +248 -0
- tests/test_workload.py +141 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
How Modeling Works
|
|
2
|
+
==================
|
|
3
|
+
|
|
4
|
+
.. _accelerator-modeling:
|
|
5
|
+
|
|
6
|
+
Modeling calculates the energy, area, and latency of an architecture running a given
|
|
7
|
+
workload. This is done in three steps:
|
|
8
|
+
|
|
9
|
+
1. **Per-Component Energy, Area, and Leakage**: This step models the area and leakage
|
|
10
|
+
power of each :py:class:`~fastfusion.frontend.arch.Component` in the architecture.
|
|
11
|
+
It then generates *per-action energy*, which is used by later steps in the model to
|
|
12
|
+
find the energy of performing hardware
|
|
13
|
+
:py:class:`~fastfusion.frontend.arch.ComponentAction`\ s.
|
|
14
|
+
|
|
15
|
+
2. **Mapping the Workload onto the Accelerator**: This step generates mappings
|
|
16
|
+
:py:class:`~fastfusion.frontend.mapping.Mapping`\ s that map the workload onto the
|
|
17
|
+
hardware.
|
|
18
|
+
|
|
19
|
+
3. **Modeling the Energy, Area, and Latency of the Mapping**: This step looks at the
|
|
20
|
+
full mapping and calculates the number of hardware actions that occur, using it to
|
|
21
|
+
total the energy and area of the accelerator.
|
|
22
|
+
|
|
23
|
+
In this package, the mapping and modeling steps are connected, letting the mapper
|
|
24
|
+
quickly find mappings that minimize the energy and latency of the accelerator.
|
|
25
|
+
|
|
26
|
+
These steps are detailed in the following sections:
|
|
27
|
+
|
|
28
|
+
.. toctree::
|
|
29
|
+
:maxdepth: 1
|
|
30
|
+
|
|
31
|
+
modeling/component_energy_area
|
|
32
|
+
modeling/accelerator_energy_latency
|
|
33
|
+
modeling/mapping
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
Arithmetic and Parsing
|
|
2
|
+
======================
|
|
3
|
+
|
|
4
|
+
Objects can include expressions that are parsed when the
|
|
5
|
+
:py:class:`~fastfusion.frontend.spec.Spec` is parsed. Parsing occurs when the
|
|
6
|
+
:py:func:`~fastfusion.frontend.spec.Spec` is going to be used to model the energy, area,
|
|
7
|
+
or latency of an accelerator, such as when the
|
|
8
|
+
:py:func:`~fastfusion.frontend.spec.Spec.calculate_component_energy_area` method is
|
|
9
|
+
called.
|
|
10
|
+
|
|
11
|
+
To-be-parsed expressions can include Python code, and supported
|
|
12
|
+
operations include many standard library functions (*e.g.,* ``range``, ``min``) and
|
|
13
|
+
functions from the ``math`` standard library (*e.g.,* ``log2``, ``ceil``).
|
|
14
|
+
|
|
15
|
+
The scope available for parsing includes the following in order of increasing
|
|
16
|
+
precedence:
|
|
17
|
+
|
|
18
|
+
- Variables defined in a top-level :py:class:`~fastfusion.frontend.variables.Variables`
|
|
19
|
+
object.
|
|
20
|
+
- Variables defined in outer-level YAML objects. Dictionary keys can be referenced by
|
|
21
|
+
names, and list entries by index. The dot syntax can be used to access dictionaries;
|
|
22
|
+
for example, ``x.y.z`` is equivalent to ``outer_scope["x"]["y"]["z"]``.
|
|
23
|
+
- Variables defined in the current YAML object. Dictionary keys may reference each other
|
|
24
|
+
as long as references are not cyclic.
|
|
25
|
+
|
|
26
|
+
The following is an example of valid parsed data:
|
|
27
|
+
|
|
28
|
+
.. code-block:: yaml
|
|
29
|
+
|
|
30
|
+
variables:
|
|
31
|
+
a: 123
|
|
32
|
+
b: a + 5
|
|
33
|
+
c: min(b, 3)
|
|
34
|
+
d: sum(y for y in range(1, 10))
|
|
35
|
+
|
|
36
|
+
# In some later scope
|
|
37
|
+
... outer_scope:
|
|
38
|
+
x: 123
|
|
39
|
+
y: a + x # Reference top-level variables
|
|
40
|
+
inner_scope:
|
|
41
|
+
a: 3 # Override outer scope
|
|
42
|
+
b: outer_scope.x
|
|
43
|
+
# Statements can be out-of-order if not cyclic referencing
|
|
44
|
+
firt_item: second_item
|
|
45
|
+
second_item: 3
|
|
46
|
+
|
|
47
|
+
Additionally, values can be set directly in Python code. For example:
|
|
48
|
+
|
|
49
|
+
.. code-block:: python
|
|
50
|
+
|
|
51
|
+
from fastfusion.frontend.arch import ComponentAttributes
|
|
52
|
+
attributes = ComponentAttributes(
|
|
53
|
+
value1=123,
|
|
54
|
+
value2="value1 + 5"
|
|
55
|
+
# ... other attributes
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
Supported Arithmetic Operations
|
|
60
|
+
-------------------------------
|
|
61
|
+
|
|
62
|
+
The following are available expressions. In addition to the below, Python keywords that
|
|
63
|
+
are available witout import (*e.g.,* ``min``) are also available
|
|
64
|
+
|
|
65
|
+
- ``ceil``: :py:func:`math.ceil`
|
|
66
|
+
- ``comb``: `math.comb`
|
|
67
|
+
- ``copysign``: `math.copysign`
|
|
68
|
+
- ``fabs``: :py:func:`math.fabs`
|
|
69
|
+
- ``factorial``: :py:func:`math.factorial`
|
|
70
|
+
- ``floor``: :py:func:`math.floor`
|
|
71
|
+
- ``fmod``: :py:func:`math.fmod`
|
|
72
|
+
- ``frexp``: :py:func:`math.frexp`
|
|
73
|
+
- ``fsum``: :py:func:`math.fsum`
|
|
74
|
+
- ``gcd``: :py:func:`math.gcd`
|
|
75
|
+
- ``isclose``: `math.isclose`
|
|
76
|
+
- ``isfinite``: :py:func:`math.isfinite`
|
|
77
|
+
- ``isinf``: :py:func:`math.isinf`
|
|
78
|
+
- ``isnan``: :py:func:`math.isnan`
|
|
79
|
+
- ``isqrt``: :py:func:`math.isqrt`
|
|
80
|
+
- ``ldexp``: :py:func:`math.ldexp`
|
|
81
|
+
- ``modf``: :py:func:`math.modf`
|
|
82
|
+
- ``perm``: :py:func:`math.perm`
|
|
83
|
+
- ``prod``: :py:func:`math.prod`
|
|
84
|
+
- ``remainder``: :py:func:`math.remainder`
|
|
85
|
+
- ``trunc``: :py:func:`math.trunc`
|
|
86
|
+
- ``exp``: :py:func:`math.exp`
|
|
87
|
+
- ``expm1``: :py:func:`math.expm1`
|
|
88
|
+
- ``log``: :py:func:`math.log`
|
|
89
|
+
- ``log1p``: :py:func:`math.log1p`
|
|
90
|
+
- ``log2``: :py:func:`math.log2`
|
|
91
|
+
- ``log10``: :py:func:`math.log10`
|
|
92
|
+
- ``pow``: :py:func:`math.pow`
|
|
93
|
+
- ``sqrt``: :py:func:`math.sqrt`
|
|
94
|
+
- ``acos``: :py:func:`math.acos`
|
|
95
|
+
- ``asin``: :py:func:`math.asin`
|
|
96
|
+
- ``atan``: :py:func:`math.atan`
|
|
97
|
+
- ``atan2``: :py:func:`math.atan2`
|
|
98
|
+
- ``cos``: :py:func:`math.cos`
|
|
99
|
+
- ``dist``: :py:func:`math.dist`
|
|
100
|
+
- ``hypot``: :py:func:`math.hypot`
|
|
101
|
+
- ``sin``: :py:func:`math.sin`
|
|
102
|
+
- ``tan``: :py:func:`math.tan`
|
|
103
|
+
- ``degrees``: :py:func:`math.degrees`
|
|
104
|
+
- ``radians``: :py:func:`math.radians`
|
|
105
|
+
- ``acosh``: :py:func:`math.acosh`
|
|
106
|
+
- ``asinh``: :py:func:`math.asinh`
|
|
107
|
+
- ``atanh``: :py:func:`math.atanh`
|
|
108
|
+
- ``cosh``: :py:func:`math.cosh`
|
|
109
|
+
- ``sinh``: :py:func:`math.sinh`
|
|
110
|
+
- ``tanh``: :py:func:`math.tanh`
|
|
111
|
+
- ``erf``: :py:func:`math.erf`
|
|
112
|
+
- ``erfc``: :py:func:`math.erfc`
|
|
113
|
+
- ``gamma``: :py:func:`math.gamma`
|
|
114
|
+
- ``lgamma``: :py:func:`math.lgamma`
|
|
115
|
+
- ``pi``: :py:func:`math.pi`
|
|
116
|
+
- ``e``: :py:func:`math.e`
|
|
117
|
+
- ``tau``: :py:func:`math.tau`
|
|
118
|
+
- ``inf``: :py:func:`math.inf`
|
|
119
|
+
- ``nan``: :py:func:`math.nan`
|
|
120
|
+
- ``abs``: :py:func:`abs`
|
|
121
|
+
- ``round``: :py:func:`round`
|
|
122
|
+
- ``pow``: :py:func:`pow`
|
|
123
|
+
- ``sum``: :py:func:`sum`
|
|
124
|
+
- ``range``: :py:func:`range`
|
|
125
|
+
- ``len``: :py:func:`len`
|
|
126
|
+
- ``min``: :py:func:`min`
|
|
127
|
+
- ``max``: :py:func:`max`
|
|
128
|
+
- ``float``: :py:func:`float`
|
|
129
|
+
- ``int``: :py:func:`int`
|
|
130
|
+
- ``str``: :py:func:`str`
|
|
131
|
+
- ``bool``: :py:func:`bool`
|
|
132
|
+
- ``list``: :py:func:`list`
|
|
133
|
+
- ``tuple``: :py:func:`tuple`
|
|
134
|
+
- ``enumerate``: :py:func:`enumerate`
|
|
135
|
+
- ``getcwd``: :py:func:`os.getcwd`
|
|
136
|
+
- ``map``: :py:func:`map`
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
.. _set-expressions:
|
|
2
|
+
|
|
3
|
+
Set Expressions
|
|
4
|
+
===============
|
|
5
|
+
|
|
6
|
+
Set expressions are used to describe sets of tensors and rank variables. Set expressions
|
|
7
|
+
are parsed for each pmapping template, meaning that they can reference specific tensors
|
|
8
|
+
for each Einsum.
|
|
9
|
+
|
|
10
|
+
As an example of a set expression, we can describe all tensors that are not intermediates
|
|
11
|
+
using the following:
|
|
12
|
+
|
|
13
|
+
.. code-block:: yaml
|
|
14
|
+
|
|
15
|
+
~Intermediates
|
|
16
|
+
|
|
17
|
+
Set expressions can use the full Python syntax, including the following:
|
|
18
|
+
|
|
19
|
+
- ``&``: Intersection
|
|
20
|
+
- ``|``: Union
|
|
21
|
+
- ``~``: Complement
|
|
22
|
+
- ``-``: Difference
|
|
23
|
+
|
|
24
|
+
You may also use Pythonic language with set expressions in some locations. For example,
|
|
25
|
+
we may want to use input tensors if and only if there are three or fewer total tensors:
|
|
26
|
+
|
|
27
|
+
.. code-block:: yaml
|
|
28
|
+
|
|
29
|
+
Inputs if len(All) > 3 else All
|
|
30
|
+
|
|
31
|
+
Set expressions are parsed for every Einsum + Flattened-Architecture:ref:`flattening`
|
|
32
|
+
combination. The following set expressions are supported:
|
|
33
|
+
|
|
34
|
+
- ``All``: All tensors used in the current Einsum.
|
|
35
|
+
- ``Inputs``: Tensors input to the current Einsum.
|
|
36
|
+
- ``Intermediates``: Tensors produced by one Einsum and consumed by another.
|
|
37
|
+
- ``Nothing``: The empty set.
|
|
38
|
+
- ``Outputs``: Tensors output from the current Einsum.
|
|
39
|
+
- ``Persistent``: Tensors that must remain in backing storage for the full duration of
|
|
40
|
+
the workload's execution. See:ref:`persistent-tensors`.
|
|
41
|
+
- ``Shared``: Tensors that are shared between multiple Einsums.
|
|
42
|
+
- ``Tensors``: Alias for ``All``.
|
|
43
|
+
|
|
44
|
+
Additionally, the following special variables are available:
|
|
45
|
+
|
|
46
|
+
- ``<Any Tensor Name>``: Resolves to the tensor with the given name. If the tensor is
|
|
47
|
+
not used in the current Einsum, then it resolves to the empty set.
|
|
48
|
+
- ``Einsum``: The name of the currently-processed Einsum. May be used in expressions
|
|
49
|
+
such as ``Inputs if Einsum == "Conv" else All``.
|
|
50
|
+
- ``EinsumObject``: For complex logic using the Einsum object directly.
|
|
51
|
+
- ``MemoryObject.Tensors``: The set of all tensors that are stored in the memory object.
|
|
52
|
+
Architectures are parsed from the top down, so this will only be available
|
|
53
|
+
``MemoryObject`` has been parsed. Lower-level memory objects may reference upper-level
|
|
54
|
+
memory objects, but not vice versa.
|
|
55
|
+
|
|
56
|
+
All tensor expressions can be converted into relevant rank variables by accessing
|
|
57
|
+
``.rank_variables``, which will return the set of all rank variables that index into the
|
|
58
|
+
tensor. If multiple tensors are referenced, then the union of all indexing rank
|
|
59
|
+
variables is returned. For example, `MemoryObject.Tensors.rank_variables` will return
|
|
60
|
+
the set of all rank variables that index into any of the tensors stored in
|
|
61
|
+
`MemoryObject`.
|
|
62
|
+
|
|
63
|
+
Additional keys can be defined following :ref:`renaming-tensors-rank-variables`.
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
YAML Parsing
|
|
2
|
+
============
|
|
3
|
+
|
|
4
|
+
FastFusion inputs can be parsed from YAML files. YAML parsing occurs once when YAML
|
|
5
|
+
files are loaded into Python.
|
|
6
|
+
|
|
7
|
+
We use an extended version of the standard YAML syntax, including the ``<<`` and ``<<<``
|
|
8
|
+
operators. ``<<``, when used as a dictionary key, will merge the contents of its value
|
|
9
|
+
with the current dictionary. ``<<<`` will merge the contents of its value and will merge
|
|
10
|
+
nested dictionaries. The ``!nomerge`` tag will block merging from occuring.
|
|
11
|
+
|
|
12
|
+
The following is a YAML parsing cheat sheet:
|
|
13
|
+
|
|
14
|
+
.. code-block:: yaml
|
|
15
|
+
|
|
16
|
+
# YAML Nodes
|
|
17
|
+
listNode:
|
|
18
|
+
- element1
|
|
19
|
+
- element2
|
|
20
|
+
|
|
21
|
+
dict_node:
|
|
22
|
+
key1: value1
|
|
23
|
+
key2: value2
|
|
24
|
+
|
|
25
|
+
# Styles
|
|
26
|
+
list_block_style:
|
|
27
|
+
- element1
|
|
28
|
+
- element2
|
|
29
|
+
list_flow_style: {element1, element2}
|
|
30
|
+
|
|
31
|
+
dict_block_style:
|
|
32
|
+
key1: value1
|
|
33
|
+
key2: value2
|
|
34
|
+
dict_flow_style: {key1: value1, key2: value2}
|
|
35
|
+
|
|
36
|
+
# Anchors, Aliases, and Merge Keys
|
|
37
|
+
|
|
38
|
+
# Anchors
|
|
39
|
+
anchored_list_flow_style: &my_anchored_list
|
|
40
|
+
- element1
|
|
41
|
+
- element2
|
|
42
|
+
anchored_list_block_style: &my_anchored_list [1, 2, 3, 4, 5]
|
|
43
|
+
|
|
44
|
+
anchored_dict_flow_style: &my_anchored_dict
|
|
45
|
+
key1: value1
|
|
46
|
+
key2: value2
|
|
47
|
+
anchored_dict_block_style: &my_anchored_dict {key1: value1, key2: value2}
|
|
48
|
+
|
|
49
|
+
# Aliases
|
|
50
|
+
my_list_alias: *my_anchored_list
|
|
51
|
+
result_of_my_list_alias: [1, 2, 3, 4, 5]
|
|
52
|
+
|
|
53
|
+
my_dict_alias: *my_anchored_dict
|
|
54
|
+
result_of_my_dict_alias: {key1: value1, key2: value2}
|
|
55
|
+
|
|
56
|
+
# Merge Keys
|
|
57
|
+
anchored_dict_1: &my_anchored_dict
|
|
58
|
+
key1: value1_dict1
|
|
59
|
+
key2: value2_dict1
|
|
60
|
+
|
|
61
|
+
anchored_dict_2: &my_anchored_dict2
|
|
62
|
+
key2: value2_dict2
|
|
63
|
+
key3: value3_dict2
|
|
64
|
+
|
|
65
|
+
merged_dict:
|
|
66
|
+
<<: [*my_anchored_dict, *my_anchored_dict2] # My_anchored_dict takes precedence
|
|
67
|
+
|
|
68
|
+
result_of_merged_dict:
|
|
69
|
+
key1: value1_dict1
|
|
70
|
+
key2: value2_dict1 # Earlier anchors take precedence
|
|
71
|
+
key3: value3_dict2
|
|
72
|
+
|
|
73
|
+
merged_dict2:
|
|
74
|
+
<<: *my_anchored_dict
|
|
75
|
+
value2: override_value2 # Override value2
|
|
76
|
+
|
|
77
|
+
result_of_merged_dict2:
|
|
78
|
+
key1: value1_dict1
|
|
79
|
+
key2: override_value2
|
|
80
|
+
|
|
81
|
+
# Hierarchical Merge Keys
|
|
82
|
+
anchored_dict_hierarchical_1: &my_anchored_dict
|
|
83
|
+
key1: value1_dict1
|
|
84
|
+
key2: {subkey1: subvalue1, subkey2: subvalue2}
|
|
85
|
+
mylist: [d, e, f]
|
|
86
|
+
mylist_nomerge: [4, 5, 6]
|
|
87
|
+
|
|
88
|
+
merged_dict_hierarchical:
|
|
89
|
+
<<<: *my_anchored_dict
|
|
90
|
+
key2: {subkey1: override1} # subkey2: subvalue2 will come from the merge
|
|
91
|
+
mylist: [a, b, c]
|
|
92
|
+
mylist_nomerge: !nomerge [1, 2, 3]
|
|
93
|
+
|
|
94
|
+
result_of_merged_dict_hierarchical:
|
|
95
|
+
key1: value1_dict1
|
|
96
|
+
key2: {subkey1: override1, subkey2: subvalue2}
|
|
97
|
+
mylist: [a, b, c, d, e, f]
|
|
98
|
+
mylist_nomerge: [1, 2, 3]
|
|
99
|
+
|
|
100
|
+
merged_dict_non_hierarchical:
|
|
101
|
+
<<: *my_anchored_dict
|
|
102
|
+
key2: {subkey1: override1} # This will override all of key2
|
|
103
|
+
mylist: [a, b, c]
|
|
104
|
+
mylist_nomerge: !nomerge [1, 2, 3]
|
|
105
|
+
|
|
106
|
+
result_of_merged_dict_non_hierarchical:
|
|
107
|
+
key1: value1_dict1
|
|
108
|
+
key2: {subkey1: override1}
|
|
109
|
+
mylist: [a, b, c]
|
|
110
|
+
mylist_nomerge: [1, 2, 3]
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
Jinja2 Templating
|
|
115
|
+
-----------------
|
|
116
|
+
|
|
117
|
+
We also support Jinja2 templating. To substitute Jinja2 variables, the
|
|
118
|
+
``jinja_parse_data`` argument can be passed to the
|
|
119
|
+
:py:meth:`~fastfusion.util.basetypes.FromYAMLAble.from_yaml` function. Additional Jinja2
|
|
120
|
+
functions are also supported, including:
|
|
121
|
+
|
|
122
|
+
- ``add_to_path(path)``: Add a path to the search path for the ``include`` function.
|
|
123
|
+
|
|
124
|
+
- ``cwd()``: Return the current working directory.
|
|
125
|
+
|
|
126
|
+
- ``find_path(path)``: Find a file in the search path and return the path to the file.
|
|
127
|
+
|
|
128
|
+
- ``include(path, key)``: Include a file and return the value of the key. For example,
|
|
129
|
+
``include(path/x.yaml, a)`` will open the file ``path/x.yaml``, look for a top-level
|
|
130
|
+
dictionary, and return the ``a`` key from that dictionary. Multiple levels of indexing
|
|
131
|
+
can be used, such as ``include(path/x.yaml, a.b.c)``.
|
|
132
|
+
|
|
133
|
+
- ``include_all(path, key)``: Include all files in a directory and return the value of the
|
|
134
|
+
key. For example, ``include_all(path/dir, a)`` will open all files in the directory
|
|
135
|
+
``path/dir``, look for a top-level dictionary, and return the ``a`` key from that dictionary.
|
|
136
|
+
|
|
137
|
+
- ``include_text(path)``: Include a file and return the text of the file.
|
|
138
|
+
|
|
139
|
+
- ``path_exists(path)``: Check if a file exists in the search path.
|
|
140
|
+
|
|
141
|
+
The following is a Jinja2 template cheat sheet:
|
|
142
|
+
|
|
143
|
+
.. code-block:: yaml
|
|
144
|
+
|
|
145
|
+
# Add files to be included in the environment
|
|
146
|
+
{{add_to_path('path/to/some/dir')}}
|
|
147
|
+
{{add_to_path('path/to/some/other/dir')}}
|
|
148
|
+
|
|
149
|
+
variables:
|
|
150
|
+
var1: 5
|
|
151
|
+
var3: "{{cwd()}}/some_file.yaml" # {{cwd()}} is the directory of this file
|
|
152
|
+
var4: "{{find_path('some_file.yaml')}}" # find_path searches all paths added by add_to_path
|
|
153
|
+
var5: {{set_by_jinja}} # Sets the value to a "set_by_jinja" variable that must be defined
|
|
154
|
+
|
|
155
|
+
{% if path_exists('some_file.yaml') %} # Check if a file exists
|
|
156
|
+
var6: "some_file.yaml exists" # Include this line if the file exists
|
|
157
|
+
{% else %}
|
|
158
|
+
|
|
159
|
+
arch:
|
|
160
|
+
# Include a subset of the file. Index into the structure with
|
|
161
|
+
# dot-separated keys.
|
|
162
|
+
nodes: {{include('other.arch.yaml', 'arch.nodes')}}
|
|
163
|
+
|
|
164
|
+
# Include the entire file
|
|
165
|
+
{{include_text('grab_text_from_file.yaml')}}
|
|
166
|
+
|
|
167
|
+
compound_components:
|
|
168
|
+
# Include the subsets of multiple files. They will be merged into one list.
|
|
169
|
+
classes: {{include_all('compound_components/*.yaml', 'compound_components.classes')}}
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
{% if enable_text_flag|default(False) %}
|
|
173
|
+
text_included_if_enable_text_flag_is_true: |
|
|
174
|
+
This text will be included if enable_text_flag is true. The |default(False) sets
|
|
175
|
+
the default value of enable_text_flag to False if it is not set.
|
|
176
|
+
{% endif %}
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
Arch Specification
|
|
2
|
+
==================
|
|
3
|
+
|
|
4
|
+
The architecture, defined by the :py:class:`~fastfusion.frontend.arch.Arch` class,
|
|
5
|
+
describes the hardware that is running the workload. An architecture is represented as a
|
|
6
|
+
tree, where branches in the tree represent different compute paths that may be taken.
|
|
7
|
+
For the rest of this section, we will assume that the architecture has been *flattened*,
|
|
8
|
+
meaning that there are no branches in the tree. The flattening procedure is described in
|
|
9
|
+
:ref:`flattening`.
|
|
10
|
+
|
|
11
|
+
A flattened architecture is a hierarchy of components with a
|
|
12
|
+
:py:class:`~fastfusion.frontend.arch.Compute` at the bottom. The following components
|
|
13
|
+
are supported:
|
|
14
|
+
|
|
15
|
+
- :py:class:`~fastfusion.frontend.arch.Memory` components store and reuse data.
|
|
16
|
+
- :py:class:`~fastfusion.frontend.arch.ProcessingStage` components perform some
|
|
17
|
+
non-compute action (*e.g.,* quantizing or transferring data).
|
|
18
|
+
- :py:class:`~fastfusion.frontend.arch.Compute` components performs the Einsum's
|
|
19
|
+
computation.
|
|
20
|
+
|
|
21
|
+
In the architecture file, each component is represented by a YAML dictionary. Component
|
|
22
|
+
types are preceded by the ``!`` character. An example architecture is shown below:
|
|
23
|
+
|
|
24
|
+
.. include:: ../../../../examples/arches/tpu_v4i_like.arch.yaml
|
|
25
|
+
:code: yaml
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
Flattening
|
|
29
|
+
----------
|
|
30
|
+
|
|
31
|
+
A given Einsum may be executed only on a single
|
|
32
|
+
:py:class:`~fastfusion.frontend.arch.Compute`, and it may use hardware objects between
|
|
33
|
+
the root of the tree and the leaf for that
|
|
34
|
+
:py:class:`~fastfusion.frontend.arch.Compute`. Flattening an architecture converts a
|
|
35
|
+
tree architecture into multiple parallel *Flattened-Architectures*, each one
|
|
36
|
+
representing one possible path from the root of the tree to the leaf for that
|
|
37
|
+
:py:class:`~fastfusion.frontend.arch.Compute`.
|
|
38
|
+
|
|
39
|
+
For example, in the architecture above, there are two compute units, the ``scalar_unit``
|
|
40
|
+
and the ``mac``. Flattening this architecture will produce two Flattened-Architectures;
|
|
41
|
+
one with a ``scalar_unit`` and one with a ``mac``. The partial mappings for each of
|
|
42
|
+
these architectures can be combined, and can share hardware that exists above both
|
|
43
|
+
compute units.
|
|
44
|
+
|
|
45
|
+
Inserting a :py:class:`~fastfusion.frontend.arch.Compute` directly into the top-level
|
|
46
|
+
architecture hierarchy will create an optional compute path that goes from the top node
|
|
47
|
+
to the compute. More complex topologies (*e.g.,* give an upper-level compute a private
|
|
48
|
+
cache) can be created by creating sub-branches following :ref:`sub-branches`.
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
Sub-Branches
|
|
52
|
+
------------
|
|
53
|
+
|
|
54
|
+
.. _sub-branches:
|
|
55
|
+
|
|
56
|
+
Sub-branches in the architecture can represent different execution paths. The following
|
|
57
|
+
branch types are supported:
|
|
58
|
+
|
|
59
|
+
- :py:class:`~fastfusion.frontend.arch.Parallel` represents multiple parallel branches,
|
|
60
|
+
one of which is executed.
|
|
61
|
+
- :py:class:`~fastfusion.frontend.arch.Hierarchical` represents a single hierarchy,
|
|
62
|
+
where each node is a parent of the following nodes.
|
|
63
|
+
|
|
64
|
+
Sub-branches are written with the following syntax:
|
|
65
|
+
|
|
66
|
+
.. code-block:: yaml
|
|
67
|
+
|
|
68
|
+
- !Memory
|
|
69
|
+
...
|
|
70
|
+
|
|
71
|
+
- !Memory
|
|
72
|
+
...
|
|
73
|
+
|
|
74
|
+
- !Parallel
|
|
75
|
+
nodes:
|
|
76
|
+
- !Hierarchical
|
|
77
|
+
nodes:
|
|
78
|
+
- ... # First-branch nodes
|
|
79
|
+
- !Hierarchical
|
|
80
|
+
nodes:
|
|
81
|
+
- ... # Second-branch nodes
|
|
82
|
+
|
|
83
|
+
# If more nodes go down here, they are children of the outer-level node, not the
|
|
84
|
+
!Parallel node.
|
|
85
|
+
- !Memory
|
|
86
|
+
...
|
|
87
|
+
|
|
88
|
+
The top-level :py:class:`~fastfusion.frontend.arch.Arch` is a
|
|
89
|
+
:py:class:`~fastfusion.frontend.arch.Hierarchical`.
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
Spatial Fanouts
|
|
93
|
+
---------------
|
|
94
|
+
|
|
95
|
+
Spatial fanouts describe the spatial organization of components in the architecture. Any
|
|
96
|
+
component may have spatial fanouts, and fanouts are allowed in any dimension. For
|
|
97
|
+
example, in the architecture above, the ``LocalBuffer`` component has a size-4 spatial
|
|
98
|
+
fanout in the ``Z`` dimension, meaning that there are 4 instances of the component. All
|
|
99
|
+
child components are duplicated in the ``Z`` dimension as well.
|
|
100
|
+
|
|
101
|
+
The ``ArrayFanout`` component also has a spatial fanout in two dimensions, the
|
|
102
|
+
``reuse_input`` and ``reuse_output`` dimensions.
|
|
103
|
+
:py:class:`~fastfusion.frontend.arch.Fanout` components can be used to instantiate
|
|
104
|
+
spatial fanouts.
|
|
105
|
+
|
|
106
|
+
Reuse in spatial dimensions may be controlled with the ``may_reuse`` keyword, which
|
|
107
|
+
takes in a set expression that is parsed according to :ref:`set-expressions`. In the
|
|
108
|
+
example, nothing is reused spatially betweeen ``LocalBuffer`` instances, while inputs
|
|
109
|
+
and outputs are reused across registers in the ``reuse_input`` and ``reuse_output``
|
|
110
|
+
dimensions, respectively. Additionally, the ``must_reuse`` keyword can be used to force
|
|
111
|
+
reuse; for example, ``must_reuse: input`` means that all spatial instances must use the
|
|
112
|
+
same input values, else the mapping will be invalid.
|
|
113
|
+
|
|
114
|
+
Spatial fanouts support the following keywords:
|
|
115
|
+
|
|
116
|
+
.. include-attrs:: fastfusion.frontend.arch.Spatial
|
|
117
|
+
|
|
118
|
+
Tensor Holders
|
|
119
|
+
--------------
|
|
120
|
+
|
|
121
|
+
Tensor holders, which include :py:class:`~fastfusion.frontend.arch.Memory` and
|
|
122
|
+
:py:class:`~fastfusion.frontend.arch.Fanout` components, hold tensors. Each of them
|
|
123
|
+
support extra attributes in their ``attributes`` field, so check
|
|
124
|
+
:py:class:`~fastfusion.frontend.arch.MemoryAttributes` and
|
|
125
|
+
:py:class:`~fastfusion.frontend.arch.FanoutAttributes` for more information on the
|
|
126
|
+
attributes that they support.
|
|
127
|
+
|
|
128
|
+
Additionally, they have an additional ``tensors`` field, which is used to define the
|
|
129
|
+
tensors that are held by the component. They are represented by the
|
|
130
|
+
:py:class:`~fastfusion.frontend.constraints.Tensors` class, which supports the following
|
|
131
|
+
fields:
|
|
132
|
+
|
|
133
|
+
.. include-attrs:: fastfusion.frontend.constraints.Tensors
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
.. _specifying-workload:
|
|
2
|
+
|
|
3
|
+
Workload and Renames Specification
|
|
4
|
+
==================================
|
|
5
|
+
|
|
6
|
+
The :py:class:`~fastfusion.frontend.workload` object describes a cascade of
|
|
7
|
+
Einsums. An Einsum, described in ..., can represent a variety of tensor algebra kernels,
|
|
8
|
+
and a cascade of Einsums is a list of Einsums with data dependencies.
|
|
9
|
+
|
|
10
|
+
The following is an example workload for three back-to-back matrix multiplications:
|
|
11
|
+
|
|
12
|
+
.. include:: ../../../../examples/workloads/three_matmuls.workload.yaml
|
|
13
|
+
:code: yaml
|
|
14
|
+
|
|
15
|
+
The top-level Workload spec has the following attributes:
|
|
16
|
+
|
|
17
|
+
.. include-attrs:: fastfusion.frontend.workload
|
|
18
|
+
|
|
19
|
+
Each Einsum in the workload represents a single Einsum with the following attributes:
|
|
20
|
+
|
|
21
|
+
.. include-attrs:: fastfusion.frontend.workload.Einsum
|
|
22
|
+
|
|
23
|
+
And each tensor access has the following attributes:
|
|
24
|
+
|
|
25
|
+
.. include-attrs:: fastfusion.frontend.workload.TensorAccess
|
|
26
|
+
|
|
27
|
+
Workloads include *ranks* and *rank variables*. Ranks are the dimensions of the tensors
|
|
28
|
+
in the Einsum, while rank variables are variables that index into these ranks. Generally
|
|
29
|
+
the rank names are uppercased versions of the rank variable names, but not always. In
|
|
30
|
+
more-complex workloads (such as the GPT example later in this doc), there may be cases
|
|
31
|
+
where we index into a rank with multiple different rank variables-- in this case, we may
|
|
32
|
+
use a projection dictionary instead of a list.
|
|
33
|
+
|
|
34
|
+
.. code-block:: yaml
|
|
35
|
+
|
|
36
|
+
- name: Matmul0
|
|
37
|
+
tensor_accesses:
|
|
38
|
+
- {name: T0, projection: [m, n0]} # Implies projection: {M: m, N0: n0}
|
|
39
|
+
- {name: W1, projection: [k, n0]} # Implies projection: {K: k, N0: n0}
|
|
40
|
+
- {name: T1, projection: [n0, n1], output: True} # Implies projection: {N0: n0, N1: n1}
|
|
41
|
+
|
|
42
|
+
- name: Matmul1
|
|
43
|
+
tensor_accesses:
|
|
44
|
+
# We can be explicit about the projection
|
|
45
|
+
- {name: T1, projection: {M: m, N1: n1}}
|
|
46
|
+
- {name: W1, projection: {N1: n1, N2: n2}}
|
|
47
|
+
- {name: T2, projection: {M: m, N2: n2}, output: True}
|
|
48
|
+
|
|
49
|
+
Renaming Tensors and Rank Variables
|
|
50
|
+
-----------------------------------
|
|
51
|
+
:label:`renaming-tensors-rank-variables`
|
|
52
|
+
|
|
53
|
+
Renames allow us to write simple, generic names (*e.g.,* ``input``,
|
|
54
|
+
``reduced_rank_variable``) in our set expresssions and have them resolve to tensors or
|
|
55
|
+
rank variable in the Einsum.
|
|
56
|
+
|
|
57
|
+
Each Einsum object has a ``renames`` attribute. This attribute may be populated with one
|
|
58
|
+
of the following:
|
|
59
|
+
|
|
60
|
+
- A dictionary of ``{new_name: source_set_expression}`` expressions, where
|
|
61
|
+
``source_set_expression`` may resolve either to tensors or rank variables. This is the
|
|
62
|
+
simplest method.
|
|
63
|
+
- A list of dictionaries, each one having the structure ``{name: new_name, source:
|
|
64
|
+
source_set_expression, expected_count: 1}``. This method allows you to write an
|
|
65
|
+
expected count, which is optional, and checks that your set expression returned the
|
|
66
|
+
expected number of elements. For example, if your source set expression were
|
|
67
|
+
``Outputs()``, an expected count of 1 would pass if there were only one output tensor,
|
|
68
|
+
but fail if there were two.
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
Additionally, you may define a separate top-level
|
|
72
|
+
:py:class:`~fastfusion.frontend.renames.Renames` object with structure mirroring the
|
|
73
|
+
workload. For example, one is in the bottom of the following workload:
|
|
74
|
+
|
|
75
|
+
.. include:: ../../../../examples/workloads/gpt3_6.7B.workload.yaml
|
|
76
|
+
:code: yaml
|
|
77
|
+
|
|
78
|
+
This renames format includes, for every Einsum, a ``tensor_accesses`` key and a
|
|
79
|
+
``rank_variables`` key. Both support the above dictionary or list-of-dictionary rename
|
|
80
|
+
formats.
|
|
81
|
+
|
|
82
|
+
If an Einsum in the renames is named ``default``, then its renames are applied to every
|
|
83
|
+
Einsum unless overridden.
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
Input Specifications
|
|
2
|
+
====================
|
|
3
|
+
|
|
4
|
+
The :py:class:`~fastfusion.frontend.spec.Spec` class is the main class that contains all
|
|
5
|
+
inputs to this framework. It includes the following:
|
|
6
|
+
|
|
7
|
+
.. include-attrs:: fastfusion.frontend.spec.Spec
|
|
8
|
+
|
|
9
|
+
Some of the Spec's inputs are described in the following sections:
|
|
10
|
+
|
|
11
|
+
.. toctree::
|
|
12
|
+
:maxdepth: 1
|
|
13
|
+
|
|
14
|
+
spec/architecture
|
|
15
|
+
spec/mapping
|
|
16
|
+
spec/workload
|
|
17
|
+
|
|
18
|
+
Input Parsing
|
|
19
|
+
-------------
|
|
20
|
+
|
|
21
|
+
Input specifications can include arithmetic expressions and set expressions. The parsing
|
|
22
|
+
is described in the following:
|
|
23
|
+
|
|
24
|
+
.. toctree::
|
|
25
|
+
:maxdepth: 1
|
|
26
|
+
|
|
27
|
+
parsing/arithmetic_parsing
|
|
28
|
+
parsing/setexpressions
|
|
29
|
+
|
|
30
|
+
Additionally, inputs can be specified with YAML files using an extend YAML syntax, which
|
|
31
|
+
is described in the following:
|
|
32
|
+
|
|
33
|
+
.. toctree::
|
|
34
|
+
:maxdepth: 1
|
|
35
|
+
|
|
36
|
+
parsing/yaml_parsing
|