accelforge 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- accelforge/__init__.py +21 -0
- accelforge/_accelerated_imports.py +16 -0
- accelforge/_deprecate/_simanneal/evalmapping.py +271 -0
- accelforge/_deprecate/_simanneal/mapspaceglobals.py +298 -0
- accelforge/_deprecate/_simanneal/simanneal.py +666 -0
- accelforge/_deprecate/_simanneal/tracking.py +105 -0
- accelforge/_deprecate/_simanneal/wrappers.py +218 -0
- accelforge/_deprecate/_simanneal2/__init__.py +7 -0
- accelforge/_deprecate/_simanneal2/simanneal.py +493 -0
- accelforge/_deprecate/_simanneal2/tracking.py +116 -0
- accelforge/_deprecate/compatibility_util.py +181 -0
- accelforge/_deprecate/layerdeduplication/__init__.py +2 -0
- accelforge/_deprecate/layerdeduplication/group_similar_einsums.py +160 -0
- accelforge/_deprecate/layerdeduplication/grouped_einsums.py +84 -0
- accelforge/_deprecate/mapping_filter_tags/__init__.py +2 -0
- accelforge/_deprecate/mapping_filter_tags/ffmt.py +212 -0
- accelforge/_deprecate/mapping_filter_tags/onesplit.py +24 -0
- accelforge/_deprecate/mapping_filter_tags/util.py +24 -0
- accelforge/_deprecate/tags.py +69 -0
- accelforge/_deprecate/viz/__init__.py +0 -0
- accelforge/_deprecate/viz/interactive.py +159 -0
- accelforge/_deprecate/viz/reservationtree.py +307 -0
- accelforge/_deprecate/viz/ski_slope.py +88 -0
- accelforge/_version.py +15 -0
- accelforge/examples.py +39 -0
- accelforge/frontend/__init__.py +10 -0
- accelforge/frontend/_binding.py +129 -0
- accelforge/frontend/_workload_isl/__init__.py +2 -0
- accelforge/frontend/_workload_isl/_isl.py +149 -0
- accelforge/frontend/_workload_isl/_symbolic.py +141 -0
- accelforge/frontend/arch copy.py +1544 -0
- accelforge/frontend/arch.py +1642 -0
- accelforge/frontend/config.py +63 -0
- accelforge/frontend/mapper/__init__.py +5 -0
- accelforge/frontend/mapper/ffm.py +126 -0
- accelforge/frontend/mapper/mapper.py +7 -0
- accelforge/frontend/mapper/metrics.py +30 -0
- accelforge/frontend/mapping/__init__.py +1 -0
- accelforge/frontend/mapping/mapping.py +1736 -0
- accelforge/frontend/model.py +14 -0
- accelforge/frontend/renames.py +150 -0
- accelforge/frontend/spec copy.py +230 -0
- accelforge/frontend/spec.py +301 -0
- accelforge/frontend/variables.py +12 -0
- accelforge/frontend/workload.py +952 -0
- accelforge/mapper/FFM/__init__.py +9 -0
- accelforge/mapper/FFM/_join_pmappings/__init__.py +0 -0
- accelforge/mapper/FFM/_join_pmappings/compatibility.py +653 -0
- accelforge/mapper/FFM/_join_pmappings/compress_pmappings.py +140 -0
- accelforge/mapper/FFM/_join_pmappings/join_pmappings.py +703 -0
- accelforge/mapper/FFM/_join_pmappings/pmapping_dataframe.py +901 -0
- accelforge/mapper/FFM/_join_pmappings/pmapping_group.py +337 -0
- accelforge/mapper/FFM/_make_pmappings/contraints/__init__.py +0 -0
- accelforge/mapper/FFM/_make_pmappings/contraints/constraints.py +360 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/__init__.py +1 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_loops.py +373 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_pmapping_templates.py +463 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_reservations.py +95 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storage_order.py +382 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storages.py +155 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings.py +411 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/__init__.py +1 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_pmappings_from_templates.py +407 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_tile_shapes.py +1681 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/run_model.py +170 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/symbol_relations.py +174 -0
- accelforge/mapper/FFM/_make_pmappings/pmapper_job.py +282 -0
- accelforge/mapper/FFM/_pareto_df/df_convention.py +273 -0
- accelforge/mapper/FFM/_pareto_df/pareto copy.py +836 -0
- accelforge/mapper/FFM/_pareto_df/pareto.py +508 -0
- accelforge/mapper/FFM/data.py +61 -0
- accelforge/mapper/FFM/main copy.py +236 -0
- accelforge/mapper/FFM/main.py +208 -0
- accelforge/mapper/FFM/mappings.py +510 -0
- accelforge/mapper/FFM/pmappings.py +310 -0
- accelforge/mapper/__init__.py +4 -0
- accelforge/mapper.py +0 -0
- accelforge/model/__init__.py +1 -0
- accelforge/model/_looptree/__init__.py +0 -0
- accelforge/model/_looptree/accesses.py +335 -0
- accelforge/model/_looptree/capacity/__init__.py +1 -0
- accelforge/model/_looptree/capacity/aggregators.py +36 -0
- accelforge/model/_looptree/capacity/capacity.py +47 -0
- accelforge/model/_looptree/energy.py +150 -0
- accelforge/model/_looptree/equivalent_ranks.py +29 -0
- accelforge/model/_looptree/latency/__init__.py +1 -0
- accelforge/model/_looptree/latency/latency.py +98 -0
- accelforge/model/_looptree/latency/memory.py +120 -0
- accelforge/model/_looptree/latency/processors.py +92 -0
- accelforge/model/_looptree/mapping_utilities.py +71 -0
- accelforge/model/_looptree/reuse/__init__.py +4 -0
- accelforge/model/_looptree/reuse/isl/__init__.py +1 -0
- accelforge/model/_looptree/reuse/isl/des.py +59 -0
- accelforge/model/_looptree/reuse/isl/isl_functions.py +374 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/__init__.py +4 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/analyze_mapping.py +297 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/skews_from_mapping.py +236 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/tiling.py +685 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/types.py +188 -0
- accelforge/model/_looptree/reuse/isl/spatial.py +260 -0
- accelforge/model/_looptree/reuse/isl/temporal.py +182 -0
- accelforge/model/_looptree/reuse/symbolic/__init__.py +1 -0
- accelforge/model/_looptree/reuse/symbolic/symbolic copy 2.py +1346 -0
- accelforge/model/_looptree/reuse/symbolic/symbolic copy.py +1408 -0
- accelforge/model/_looptree/reuse/symbolic/symbolic.py +1396 -0
- accelforge/model/_looptree/run.py +122 -0
- accelforge/model/_looptree/types.py +26 -0
- accelforge/model/_looptree/visualization/__init__.py +0 -0
- accelforge/model/_looptree/visualization/occupancy.py +11 -0
- accelforge/model/main.py +222 -0
- accelforge/plotting/__init__.py +2 -0
- accelforge/plotting/mappings.py +219 -0
- accelforge/plotting/specs.py +57 -0
- accelforge/util/__init__.py +4 -0
- accelforge/util/_base_analysis_types.py +24 -0
- accelforge/util/_basetypes.py +1089 -0
- accelforge/util/_frozenset.py +36 -0
- accelforge/util/_isl.py +29 -0
- accelforge/util/_itertools.py +14 -0
- accelforge/util/_mathfuncs.py +57 -0
- accelforge/util/_parse_expressions.py +339 -0
- accelforge/util/_picklecache.py +32 -0
- accelforge/util/_setexpressions.py +268 -0
- accelforge/util/_sympy/__init__.py +0 -0
- accelforge/util/_sympy/broadcast_max.py +18 -0
- accelforge/util/_visualization.py +112 -0
- accelforge/util/_yaml.py +579 -0
- accelforge/util/parallel.py +193 -0
- accelforge-0.0.1.dist-info/METADATA +64 -0
- accelforge-0.0.1.dist-info/RECORD +258 -0
- accelforge-0.0.1.dist-info/WHEEL +5 -0
- accelforge-0.0.1.dist-info/licenses/LICENSE +19 -0
- accelforge-0.0.1.dist-info/top_level.txt +5 -0
- docs/_build/html/_sources/fastfusion.frontend.mapper.rst.txt +37 -0
- docs/_build/html/_sources/fastfusion.frontend.rst.txt +70 -0
- docs/_build/html/_sources/fastfusion.frontend.workload.rst.txt +21 -0
- docs/_build/html/_sources/fastfusion.mapper.FFM.rst.txt +37 -0
- docs/_build/html/_sources/fastfusion.mapper.rst.txt +18 -0
- docs/_build/html/_sources/fastfusion.rst.txt +20 -0
- docs/_build/html/_sources/fastfusion.util.rst.txt +21 -0
- docs/_build/html/_sources/index.rst.txt +87 -0
- docs/_build/html/_sources/modules.rst.txt +7 -0
- docs/_build/html/_sources/notes/citation.rst.txt +45 -0
- docs/_build/html/_sources/notes/definitions.rst.txt +43 -0
- docs/_build/html/_sources/notes/faqs.rst.txt +39 -0
- docs/_build/html/_sources/notes/modeling/accelerator_energy_latency.rst.txt +72 -0
- docs/_build/html/_sources/notes/modeling/component_energy_area.rst.txt +96 -0
- docs/_build/html/_sources/notes/modeling/mapping.rst.txt +100 -0
- docs/_build/html/_sources/notes/modeling.rst.txt +33 -0
- docs/_build/html/_sources/notes/parsing/arithmetic_parsing.rst.txt +136 -0
- docs/_build/html/_sources/notes/parsing/setexpressions.rst.txt +63 -0
- docs/_build/html/_sources/notes/parsing/yaml_parsing.rst.txt +176 -0
- docs/_build/html/_sources/notes/quickstart_and_installation.rst.txt +9 -0
- docs/_build/html/_sources/notes/spec/architecture.rst.txt +133 -0
- docs/_build/html/_sources/notes/spec/mapping.rst.txt +12 -0
- docs/_build/html/_sources/notes/spec/workload.rst.txt +83 -0
- docs/_build/html/_sources/notes/spec.rst.txt +36 -0
- docs/source/_ext/include_attrs.py +213 -0
- docs/source/_ext/include_docstring.py +364 -0
- docs/source/_ext/include_functions.py +154 -0
- docs/source/_ext/include_notebook.py +131 -0
- docs/source/_ext/include_yaml.py +119 -0
- docs/source/_ext/inherited_attributes.py +222 -0
- docs/source/_ext/paths.py +4 -0
- docs/source/conf.py +79 -0
- examples/arches/compute_in_memory/_include.yaml +74 -0
- examples/arches/compute_in_memory/_include_functions.py +229 -0
- examples/arches/compute_in_memory/_load_spec.py +57 -0
- examples/arches/compute_in_memory/components/c2c_multiplier.py +181 -0
- examples/arches/compute_in_memory/components/dac_c2c_r2r.py +605 -0
- examples/arches/compute_in_memory/components/misc.py +195 -0
- examples/arches/compute_in_memory/components/util/bit_functions.py +51 -0
- examples/arches/compute_in_memory/components/zero_comparator.py +92 -0
- examples/arches/compute_in_memory/isaac.yaml +233 -0
- examples/arches/compute_in_memory/memory_cells/ecram_demo.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/rram_example.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/rram_isaac_isca_2016.yaml +64 -0
- examples/arches/compute_in_memory/memory_cells/rram_neurosim_default.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/rram_raella_isca_2023.yaml +70 -0
- examples/arches/compute_in_memory/memory_cells/rram_wan_nature_2022.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_colonnade_jssc_2021.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_example.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_jia_jssc_2020.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_sinangil_jssc_2021.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_wang_vlsi_2022.yaml +63 -0
- examples/arches/compute_in_memory/wang_vlsi_2022.yaml +289 -0
- examples/arches/eyeriss.yaml +68 -0
- examples/arches/fanout_variations/at_glb.yaml +31 -0
- examples/arches/fanout_variations/at_glb_with_fanout_node.yaml +34 -0
- examples/arches/fanout_variations/at_mac.yaml +31 -0
- examples/arches/fanout_variations/at_mac_with_constraints.yaml +38 -0
- examples/arches/fanout_variations/at_mac_with_fanout_node.yaml +34 -0
- examples/arches/nvdla.yaml +47 -0
- examples/arches/simple.yaml +28 -0
- examples/arches/tpu_v4i.yaml +67 -0
- examples/mappings/unfused_matmuls_to_simple.yaml +33 -0
- examples/misc/component_annotated.yaml +33 -0
- examples/workloads/gpt3_6.7B.yaml +124 -0
- examples/workloads/matmuls.yaml +20 -0
- examples/workloads/mobilenet_28.yaml +81 -0
- examples/workloads/mobilenet_various_separate.yaml +106 -0
- examples/workloads/three_matmuls_annotated.yaml +59 -0
- notebooks/.ipynb_checkpoints/fastfusion_arch_study_michael-checkpoint.ipynb +359 -0
- notebooks/compute_in_memory/_scripts.py +339 -0
- notebooks/compute_in_memory/isaac.guide.ipynb +270 -0
- notebooks/compute_in_memory/wang_vlsi_2022.ipynb +602 -0
- notebooks/paths.py +4 -0
- notebooks/tutorials/.ipynb_checkpoints/1_FFM-checkpoint.ipynb +3110 -0
- notebooks/tutorials/FFM.ipynb +3498 -0
- notebooks/tutorials/_include.py +48 -0
- notebooks/tutorials/component_energy_area.ipynb +363 -0
- tests/Q_mapping.yaml +38 -0
- tests/__init__.py +0 -0
- tests/conv.mapping.yaml +27 -0
- tests/conv.workload.yaml +13 -0
- tests/conv_sym.mapping.yaml +43 -0
- tests/copy.mapping.yaml +35 -0
- tests/copy.workload.yaml +15 -0
- tests/distribuffers/__init__.py +0 -0
- tests/distribuffers/multicast/test_cases.yaml +482 -0
- tests/distribuffers/spec/binding/valid_bindings.yaml +97 -0
- tests/distribuffers/spec/distributed.yaml +100 -0
- tests/distribuffers/spec/logical_arch.yaml +32 -0
- tests/distribuffers/spec/physical_arch.yaml +69 -0
- tests/distribuffers/test_binding.py +48 -0
- tests/frontend/__init__.py +0 -0
- tests/frontend/test_mapping_viz.py +52 -0
- tests/mapper/__init__.py +0 -0
- tests/mapper/configs/conv1d/conv1d.mapping.yaml +31 -0
- tests/mapper/configs/conv1d/conv1d.workload.yaml +11 -0
- tests/mapper/configs/two_conv1d/two_conv1d.expected.yaml +38 -0
- tests/mapper/configs/two_conv1d/two_conv1d.mapping.yaml +54 -0
- tests/mapper/configs/two_conv1d/two_conv1d.workload.yaml +19 -0
- tests/mapper/test_mapping_to_isl.py +90 -0
- tests/mapper/test_spatial_reuse_analysis.py +67 -0
- tests/mapper/test_temporal_reuse_analysis.py +56 -0
- tests/mapper/util.py +58 -0
- tests/matmul.mapping.yaml +29 -0
- tests/matmul.workload.yaml +12 -0
- tests/matmul_spatial.mapping.yaml +44 -0
- tests/mha.renames.yaml +65 -0
- tests/mha.workload.yaml +67 -0
- tests/mha.yaml +59 -0
- tests/mha_full.workload.yaml +67 -0
- tests/mobilenet.workload.yaml +35 -0
- tests/mobilenet_long.workload.yaml +64 -0
- tests/pmappingcache.py +24 -0
- tests/processing_stage.arch.yaml +40 -0
- tests/snowcat.arch.yaml +36 -0
- tests/test_ffm_join_pmappings.py +106 -0
- tests/test_ffm_make_pmappings.py +82 -0
- tests/test_ffm_make_tile_shapes.py +49 -0
- tests/test_mapper.py +100 -0
- tests/test_model.py +37 -0
- tests/test_plotting.py +72 -0
- tests/test_processing_stage.py +46 -0
- tests/test_symbolic_model.py +248 -0
- tests/test_workload.py +141 -0
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
from typing import Annotated, Callable, Optional
|
|
2
|
+
|
|
3
|
+
from pydantic import ConfigDict
|
|
4
|
+
from hwcomponents import ComponentModel
|
|
5
|
+
from accelforge.util._basetypes import ParsableDict, ParsableList, ParsableModel
|
|
6
|
+
from accelforge._version import assert_version, __version__
|
|
7
|
+
from platformdirs import user_config_dir
|
|
8
|
+
import logging
|
|
9
|
+
import os
|
|
10
|
+
import sys
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
USER_CUSTOM_CONFIG_PATH_VAR = "ACCELFORGE_CONFIG_PATH"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_config() -> "Config":
|
|
17
|
+
if USER_CUSTOM_CONFIG_PATH_VAR in os.environ:
|
|
18
|
+
f = os.environ[USER_CUSTOM_CONFIG_PATH_VAR]
|
|
19
|
+
elif hasattr(sys, "real_prefix") or (
|
|
20
|
+
hasattr(sys, "base_prefix") and sys.base_prefix != sys.prefix
|
|
21
|
+
):
|
|
22
|
+
f = os.path.join(sys.prefix, "accelforge", "config.yaml")
|
|
23
|
+
else:
|
|
24
|
+
f = os.path.join(user_config_dir("accelforge"), "config.yaml")
|
|
25
|
+
|
|
26
|
+
if not os.path.exists(f):
|
|
27
|
+
logging.warning(f"No configuration file found. Creating config file at {f}.")
|
|
28
|
+
os.makedirs(os.path.dirname(f), exist_ok=True)
|
|
29
|
+
config = Config()
|
|
30
|
+
config.to_yaml(f)
|
|
31
|
+
|
|
32
|
+
logging.warning(f"Loading configuration file from {f}")
|
|
33
|
+
return Config.from_yaml(f)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class Config(ParsableModel):
|
|
37
|
+
# version: Annotated[str, assert_version] = __version__
|
|
38
|
+
|
|
39
|
+
expression_custom_functions: ParsableList[str | Callable] = ParsableList()
|
|
40
|
+
"""
|
|
41
|
+
A list of functions to use while parsing expressions. These can either be functions
|
|
42
|
+
or paths to Python files that contain the functions. If a path is provided, then all
|
|
43
|
+
functions in the file will be added to the parser.
|
|
44
|
+
"""
|
|
45
|
+
component_models: ParsableList[str | ComponentModel] = ParsableList()
|
|
46
|
+
"""
|
|
47
|
+
A list of hwcomponents models to use for the energy and area calculations. These can
|
|
48
|
+
either be paths to Python files that contain the models, or `hwcomponents`
|
|
49
|
+
:py:class:`~hwcomponents.ComponentModel` objects.
|
|
50
|
+
"""
|
|
51
|
+
use_installed_component_models: Optional[bool] = True
|
|
52
|
+
"""
|
|
53
|
+
If True, then the `hwcomponents` library will find all installed models. If False,
|
|
54
|
+
then only the models specified in `component_models` will be used.
|
|
55
|
+
"""
|
|
56
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
57
|
+
|
|
58
|
+
@classmethod
|
|
59
|
+
def from_yaml(cls, f: str) -> "Config":
|
|
60
|
+
from accelforge.util import _yaml
|
|
61
|
+
|
|
62
|
+
data = _yaml.load_yaml(f)
|
|
63
|
+
return cls(**data)
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
from typing import Any, Annotated, Literal
|
|
2
|
+
|
|
3
|
+
from accelforge.frontend.mapper.metrics import Metrics
|
|
4
|
+
from accelforge.util._basetypes import ParsableModel
|
|
5
|
+
from accelforge._version import assert_version, __version__
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class FFM(ParsableModel):
|
|
9
|
+
"""Configuration for the Fast and Fusiest Mapper."""
|
|
10
|
+
|
|
11
|
+
# version: Annotated[str, assert_version] = __version__
|
|
12
|
+
# """ Version """
|
|
13
|
+
|
|
14
|
+
metrics: Metrics = Metrics.ENERGY
|
|
15
|
+
""" Metrics used to optimize mappings. """
|
|
16
|
+
|
|
17
|
+
info_metrics: Metrics = Metrics.all_metrics()
|
|
18
|
+
"""Metrics to be reported for final mappings."""
|
|
19
|
+
|
|
20
|
+
_timeloop_style_even: bool = False
|
|
21
|
+
""" Timeloop-style even mappings must have, for each memory, at most two
|
|
22
|
+
locations where storage nodes may be placed. """
|
|
23
|
+
|
|
24
|
+
force_memory_hierarchy_order: bool = True
|
|
25
|
+
"""
|
|
26
|
+
If set to true, storage nodes for lower-level memories must be placed below storage
|
|
27
|
+
nodes for higher-level memories. For example, all MainMemory storage nodes must go
|
|
28
|
+
above all LocalBuffer storage nodes.
|
|
29
|
+
|
|
30
|
+
This constraint always applies to same-tensor storage nodes (e.g., MainMemory
|
|
31
|
+
reusing Output must go above LocalBuffer reusing Output); turning it off will
|
|
32
|
+
permit things like MainMemory reusing Output going above LocalBuffer reusing
|
|
33
|
+
Input.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
out_of_order_hierarchy_explore_removing_spatials_for_more_temporals: bool = False
|
|
37
|
+
"""
|
|
38
|
+
If force_memory_hierarchy_order is set to False or is set to False for any
|
|
39
|
+
particular component, and a spatial fanout ends up being raised above a storage node
|
|
40
|
+
that does not have that fanout, then there may be cases where a spatial loop is put
|
|
41
|
+
above a component that does not have the associated fanout.
|
|
42
|
+
|
|
43
|
+
When this happens, we may not put between the spatial and the storage node any
|
|
44
|
+
temporal loops that affect the same indexing expressions as the spatial loops.
|
|
45
|
+
|
|
46
|
+
For example, the following is not allowed:
|
|
47
|
+
|
|
48
|
+
Arch:
|
|
49
|
+
|
|
50
|
+
- Global Buffer
|
|
51
|
+
- 2x fanout
|
|
52
|
+
- Register
|
|
53
|
+
|
|
54
|
+
Mapping:
|
|
55
|
+
|
|
56
|
+
spatial-for-reg n in [0, 10):
|
|
57
|
+
[Register reuses input]
|
|
58
|
+
for n in [0, 2):
|
|
59
|
+
[Global Buffer reuses output]
|
|
60
|
+
|
|
61
|
+
By default, if there are spatial loops that are not constrained away, then the
|
|
62
|
+
mapper will not explore putting any temporal loops that conflict. In the above
|
|
63
|
+
example, it will never place the above temporal loop. If this is set to True, then
|
|
64
|
+
the mapper will explore removing the spatial loop in order to allow for the temporal
|
|
65
|
+
loop to be placed. In the above example, it will explore removing the spatial loop
|
|
66
|
+
in order to allow for the temporal loop to be placed.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
max_fused_loops_per_rank_variable: int = 1
|
|
70
|
+
""" The maximum number of fused loops in a pmapping for a given rank variable. """
|
|
71
|
+
|
|
72
|
+
max_fused_loops: float | int = float("inf")
|
|
73
|
+
""" The maximum total number of fused loops in a pmapping. """
|
|
74
|
+
|
|
75
|
+
max_loops: float | int = float("inf")
|
|
76
|
+
""" The maximum total loops in a pmapping. """
|
|
77
|
+
|
|
78
|
+
max_loops_minus_ranks: float | int = float("inf")
|
|
79
|
+
"""
|
|
80
|
+
The maximum total loops in a pmapping minus the number of ranks. For example,
|
|
81
|
+
3 means that the number of loops can be up to (the number of ranks + 3).
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
_can_lower_outermost_memory: bool = False
|
|
85
|
+
"""
|
|
86
|
+
Whether the storage node of outermost memory can be lowered. If set to True, the
|
|
87
|
+
mapper may exchange tiles of tensors via the outermost memory, instead of storing
|
|
88
|
+
full tensors. Set this to True to explore reducing outermost memory usage.
|
|
89
|
+
|
|
90
|
+
TODO: Also need to explore putting loops above the outermost memory then. This is
|
|
91
|
+
currently private because we may want to have a catch-all term like
|
|
92
|
+
"save_outermost_memory_usage".
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
_only_output_pmapping_index: int | None = None
|
|
96
|
+
"""
|
|
97
|
+
For debugging. Only output the pmapping with this index.
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
memory_limit: float | int = float("inf")
|
|
101
|
+
""" The maximum memory limit for the mapper. """
|
|
102
|
+
|
|
103
|
+
memory_limit_per_process: float | int = float("inf")
|
|
104
|
+
""" The maximum memory limit per process for one of the mapper's processes. """
|
|
105
|
+
|
|
106
|
+
time_limit: float | int = float("inf")
|
|
107
|
+
""" The maximum time limit for the mapper. """
|
|
108
|
+
|
|
109
|
+
time_limit_per_pmapping_template: float | int = float("inf")
|
|
110
|
+
""" The maximum time limit per pmapping template. """
|
|
111
|
+
|
|
112
|
+
max_pmapping_templates_per_einsum: float | int = float("inf")
|
|
113
|
+
"""
|
|
114
|
+
The maximum number of pmapping templates per Einsum. Once this many templates are
|
|
115
|
+
generated, the mapper will stop generating more. This is useful for debugging (why
|
|
116
|
+
are so many templates being generated?).
|
|
117
|
+
"""
|
|
118
|
+
|
|
119
|
+
_count_option_for_mapsapce_size_evaluation: tuple[
|
|
120
|
+
Literal[
|
|
121
|
+
"redundant_loop_orders",
|
|
122
|
+
"non_helpful_loops_for_loop_orders",
|
|
123
|
+
"non_helpful_tile_shapes",
|
|
124
|
+
"redundant_dataplacements",
|
|
125
|
+
]
|
|
126
|
+
] = ()
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from enum import auto, Flag
|
|
2
|
+
|
|
3
|
+
from functools import reduce
|
|
4
|
+
from operator import or_
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Metrics(Flag):
|
|
8
|
+
"""
|
|
9
|
+
Metrics used to optimize mappings.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
LATENCY = auto()
|
|
13
|
+
""" Latency. Minimize the amount of time taken to execute the workload. """
|
|
14
|
+
|
|
15
|
+
ENERGY = auto()
|
|
16
|
+
""" Energy. Minimize the amount of energy consumed by the workload. """
|
|
17
|
+
|
|
18
|
+
RESOURCE_USAGE = auto()
|
|
19
|
+
"""
|
|
20
|
+
Resource usage. Minimize the amount of resources used by the workload. This
|
|
21
|
+
objective is multivariate, and must consider every resource available to the
|
|
22
|
+
hardware.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
ACTIONS = auto()
|
|
26
|
+
"""Action counts."""
|
|
27
|
+
|
|
28
|
+
@classmethod
|
|
29
|
+
def all_metrics(cls):
|
|
30
|
+
return reduce(or_, iter(cls), cls.LATENCY)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .mapping import *
|