accelforge 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- accelforge/__init__.py +21 -0
- accelforge/_accelerated_imports.py +16 -0
- accelforge/_deprecate/_simanneal/evalmapping.py +271 -0
- accelforge/_deprecate/_simanneal/mapspaceglobals.py +298 -0
- accelforge/_deprecate/_simanneal/simanneal.py +666 -0
- accelforge/_deprecate/_simanneal/tracking.py +105 -0
- accelforge/_deprecate/_simanneal/wrappers.py +218 -0
- accelforge/_deprecate/_simanneal2/__init__.py +7 -0
- accelforge/_deprecate/_simanneal2/simanneal.py +493 -0
- accelforge/_deprecate/_simanneal2/tracking.py +116 -0
- accelforge/_deprecate/compatibility_util.py +181 -0
- accelforge/_deprecate/layerdeduplication/__init__.py +2 -0
- accelforge/_deprecate/layerdeduplication/group_similar_einsums.py +160 -0
- accelforge/_deprecate/layerdeduplication/grouped_einsums.py +84 -0
- accelforge/_deprecate/mapping_filter_tags/__init__.py +2 -0
- accelforge/_deprecate/mapping_filter_tags/ffmt.py +212 -0
- accelforge/_deprecate/mapping_filter_tags/onesplit.py +24 -0
- accelforge/_deprecate/mapping_filter_tags/util.py +24 -0
- accelforge/_deprecate/tags.py +69 -0
- accelforge/_deprecate/viz/__init__.py +0 -0
- accelforge/_deprecate/viz/interactive.py +159 -0
- accelforge/_deprecate/viz/reservationtree.py +307 -0
- accelforge/_deprecate/viz/ski_slope.py +88 -0
- accelforge/_version.py +15 -0
- accelforge/examples.py +39 -0
- accelforge/frontend/__init__.py +10 -0
- accelforge/frontend/_binding.py +129 -0
- accelforge/frontend/_workload_isl/__init__.py +2 -0
- accelforge/frontend/_workload_isl/_isl.py +149 -0
- accelforge/frontend/_workload_isl/_symbolic.py +141 -0
- accelforge/frontend/arch copy.py +1544 -0
- accelforge/frontend/arch.py +1642 -0
- accelforge/frontend/config.py +63 -0
- accelforge/frontend/mapper/__init__.py +5 -0
- accelforge/frontend/mapper/ffm.py +126 -0
- accelforge/frontend/mapper/mapper.py +7 -0
- accelforge/frontend/mapper/metrics.py +30 -0
- accelforge/frontend/mapping/__init__.py +1 -0
- accelforge/frontend/mapping/mapping.py +1736 -0
- accelforge/frontend/model.py +14 -0
- accelforge/frontend/renames.py +150 -0
- accelforge/frontend/spec copy.py +230 -0
- accelforge/frontend/spec.py +301 -0
- accelforge/frontend/variables.py +12 -0
- accelforge/frontend/workload.py +952 -0
- accelforge/mapper/FFM/__init__.py +9 -0
- accelforge/mapper/FFM/_join_pmappings/__init__.py +0 -0
- accelforge/mapper/FFM/_join_pmappings/compatibility.py +653 -0
- accelforge/mapper/FFM/_join_pmappings/compress_pmappings.py +140 -0
- accelforge/mapper/FFM/_join_pmappings/join_pmappings.py +703 -0
- accelforge/mapper/FFM/_join_pmappings/pmapping_dataframe.py +901 -0
- accelforge/mapper/FFM/_join_pmappings/pmapping_group.py +337 -0
- accelforge/mapper/FFM/_make_pmappings/contraints/__init__.py +0 -0
- accelforge/mapper/FFM/_make_pmappings/contraints/constraints.py +360 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/__init__.py +1 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_loops.py +373 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_pmapping_templates.py +463 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_reservations.py +95 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storage_order.py +382 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storages.py +155 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings.py +411 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/__init__.py +1 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_pmappings_from_templates.py +407 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_tile_shapes.py +1681 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/run_model.py +170 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/symbol_relations.py +174 -0
- accelforge/mapper/FFM/_make_pmappings/pmapper_job.py +282 -0
- accelforge/mapper/FFM/_pareto_df/df_convention.py +273 -0
- accelforge/mapper/FFM/_pareto_df/pareto copy.py +836 -0
- accelforge/mapper/FFM/_pareto_df/pareto.py +508 -0
- accelforge/mapper/FFM/data.py +61 -0
- accelforge/mapper/FFM/main copy.py +236 -0
- accelforge/mapper/FFM/main.py +208 -0
- accelforge/mapper/FFM/mappings.py +510 -0
- accelforge/mapper/FFM/pmappings.py +310 -0
- accelforge/mapper/__init__.py +4 -0
- accelforge/mapper.py +0 -0
- accelforge/model/__init__.py +1 -0
- accelforge/model/_looptree/__init__.py +0 -0
- accelforge/model/_looptree/accesses.py +335 -0
- accelforge/model/_looptree/capacity/__init__.py +1 -0
- accelforge/model/_looptree/capacity/aggregators.py +36 -0
- accelforge/model/_looptree/capacity/capacity.py +47 -0
- accelforge/model/_looptree/energy.py +150 -0
- accelforge/model/_looptree/equivalent_ranks.py +29 -0
- accelforge/model/_looptree/latency/__init__.py +1 -0
- accelforge/model/_looptree/latency/latency.py +98 -0
- accelforge/model/_looptree/latency/memory.py +120 -0
- accelforge/model/_looptree/latency/processors.py +92 -0
- accelforge/model/_looptree/mapping_utilities.py +71 -0
- accelforge/model/_looptree/reuse/__init__.py +4 -0
- accelforge/model/_looptree/reuse/isl/__init__.py +1 -0
- accelforge/model/_looptree/reuse/isl/des.py +59 -0
- accelforge/model/_looptree/reuse/isl/isl_functions.py +374 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/__init__.py +4 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/analyze_mapping.py +297 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/skews_from_mapping.py +236 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/tiling.py +685 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/types.py +188 -0
- accelforge/model/_looptree/reuse/isl/spatial.py +260 -0
- accelforge/model/_looptree/reuse/isl/temporal.py +182 -0
- accelforge/model/_looptree/reuse/symbolic/__init__.py +1 -0
- accelforge/model/_looptree/reuse/symbolic/symbolic copy 2.py +1346 -0
- accelforge/model/_looptree/reuse/symbolic/symbolic copy.py +1408 -0
- accelforge/model/_looptree/reuse/symbolic/symbolic.py +1396 -0
- accelforge/model/_looptree/run.py +122 -0
- accelforge/model/_looptree/types.py +26 -0
- accelforge/model/_looptree/visualization/__init__.py +0 -0
- accelforge/model/_looptree/visualization/occupancy.py +11 -0
- accelforge/model/main.py +222 -0
- accelforge/plotting/__init__.py +2 -0
- accelforge/plotting/mappings.py +219 -0
- accelforge/plotting/specs.py +57 -0
- accelforge/util/__init__.py +4 -0
- accelforge/util/_base_analysis_types.py +24 -0
- accelforge/util/_basetypes.py +1089 -0
- accelforge/util/_frozenset.py +36 -0
- accelforge/util/_isl.py +29 -0
- accelforge/util/_itertools.py +14 -0
- accelforge/util/_mathfuncs.py +57 -0
- accelforge/util/_parse_expressions.py +339 -0
- accelforge/util/_picklecache.py +32 -0
- accelforge/util/_setexpressions.py +268 -0
- accelforge/util/_sympy/__init__.py +0 -0
- accelforge/util/_sympy/broadcast_max.py +18 -0
- accelforge/util/_visualization.py +112 -0
- accelforge/util/_yaml.py +579 -0
- accelforge/util/parallel.py +193 -0
- accelforge-0.0.1.dist-info/METADATA +64 -0
- accelforge-0.0.1.dist-info/RECORD +258 -0
- accelforge-0.0.1.dist-info/WHEEL +5 -0
- accelforge-0.0.1.dist-info/licenses/LICENSE +19 -0
- accelforge-0.0.1.dist-info/top_level.txt +5 -0
- docs/_build/html/_sources/fastfusion.frontend.mapper.rst.txt +37 -0
- docs/_build/html/_sources/fastfusion.frontend.rst.txt +70 -0
- docs/_build/html/_sources/fastfusion.frontend.workload.rst.txt +21 -0
- docs/_build/html/_sources/fastfusion.mapper.FFM.rst.txt +37 -0
- docs/_build/html/_sources/fastfusion.mapper.rst.txt +18 -0
- docs/_build/html/_sources/fastfusion.rst.txt +20 -0
- docs/_build/html/_sources/fastfusion.util.rst.txt +21 -0
- docs/_build/html/_sources/index.rst.txt +87 -0
- docs/_build/html/_sources/modules.rst.txt +7 -0
- docs/_build/html/_sources/notes/citation.rst.txt +45 -0
- docs/_build/html/_sources/notes/definitions.rst.txt +43 -0
- docs/_build/html/_sources/notes/faqs.rst.txt +39 -0
- docs/_build/html/_sources/notes/modeling/accelerator_energy_latency.rst.txt +72 -0
- docs/_build/html/_sources/notes/modeling/component_energy_area.rst.txt +96 -0
- docs/_build/html/_sources/notes/modeling/mapping.rst.txt +100 -0
- docs/_build/html/_sources/notes/modeling.rst.txt +33 -0
- docs/_build/html/_sources/notes/parsing/arithmetic_parsing.rst.txt +136 -0
- docs/_build/html/_sources/notes/parsing/setexpressions.rst.txt +63 -0
- docs/_build/html/_sources/notes/parsing/yaml_parsing.rst.txt +176 -0
- docs/_build/html/_sources/notes/quickstart_and_installation.rst.txt +9 -0
- docs/_build/html/_sources/notes/spec/architecture.rst.txt +133 -0
- docs/_build/html/_sources/notes/spec/mapping.rst.txt +12 -0
- docs/_build/html/_sources/notes/spec/workload.rst.txt +83 -0
- docs/_build/html/_sources/notes/spec.rst.txt +36 -0
- docs/source/_ext/include_attrs.py +213 -0
- docs/source/_ext/include_docstring.py +364 -0
- docs/source/_ext/include_functions.py +154 -0
- docs/source/_ext/include_notebook.py +131 -0
- docs/source/_ext/include_yaml.py +119 -0
- docs/source/_ext/inherited_attributes.py +222 -0
- docs/source/_ext/paths.py +4 -0
- docs/source/conf.py +79 -0
- examples/arches/compute_in_memory/_include.yaml +74 -0
- examples/arches/compute_in_memory/_include_functions.py +229 -0
- examples/arches/compute_in_memory/_load_spec.py +57 -0
- examples/arches/compute_in_memory/components/c2c_multiplier.py +181 -0
- examples/arches/compute_in_memory/components/dac_c2c_r2r.py +605 -0
- examples/arches/compute_in_memory/components/misc.py +195 -0
- examples/arches/compute_in_memory/components/util/bit_functions.py +51 -0
- examples/arches/compute_in_memory/components/zero_comparator.py +92 -0
- examples/arches/compute_in_memory/isaac.yaml +233 -0
- examples/arches/compute_in_memory/memory_cells/ecram_demo.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/rram_example.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/rram_isaac_isca_2016.yaml +64 -0
- examples/arches/compute_in_memory/memory_cells/rram_neurosim_default.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/rram_raella_isca_2023.yaml +70 -0
- examples/arches/compute_in_memory/memory_cells/rram_wan_nature_2022.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_colonnade_jssc_2021.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_example.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_jia_jssc_2020.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_sinangil_jssc_2021.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_wang_vlsi_2022.yaml +63 -0
- examples/arches/compute_in_memory/wang_vlsi_2022.yaml +289 -0
- examples/arches/eyeriss.yaml +68 -0
- examples/arches/fanout_variations/at_glb.yaml +31 -0
- examples/arches/fanout_variations/at_glb_with_fanout_node.yaml +34 -0
- examples/arches/fanout_variations/at_mac.yaml +31 -0
- examples/arches/fanout_variations/at_mac_with_constraints.yaml +38 -0
- examples/arches/fanout_variations/at_mac_with_fanout_node.yaml +34 -0
- examples/arches/nvdla.yaml +47 -0
- examples/arches/simple.yaml +28 -0
- examples/arches/tpu_v4i.yaml +67 -0
- examples/mappings/unfused_matmuls_to_simple.yaml +33 -0
- examples/misc/component_annotated.yaml +33 -0
- examples/workloads/gpt3_6.7B.yaml +124 -0
- examples/workloads/matmuls.yaml +20 -0
- examples/workloads/mobilenet_28.yaml +81 -0
- examples/workloads/mobilenet_various_separate.yaml +106 -0
- examples/workloads/three_matmuls_annotated.yaml +59 -0
- notebooks/.ipynb_checkpoints/fastfusion_arch_study_michael-checkpoint.ipynb +359 -0
- notebooks/compute_in_memory/_scripts.py +339 -0
- notebooks/compute_in_memory/isaac.guide.ipynb +270 -0
- notebooks/compute_in_memory/wang_vlsi_2022.ipynb +602 -0
- notebooks/paths.py +4 -0
- notebooks/tutorials/.ipynb_checkpoints/1_FFM-checkpoint.ipynb +3110 -0
- notebooks/tutorials/FFM.ipynb +3498 -0
- notebooks/tutorials/_include.py +48 -0
- notebooks/tutorials/component_energy_area.ipynb +363 -0
- tests/Q_mapping.yaml +38 -0
- tests/__init__.py +0 -0
- tests/conv.mapping.yaml +27 -0
- tests/conv.workload.yaml +13 -0
- tests/conv_sym.mapping.yaml +43 -0
- tests/copy.mapping.yaml +35 -0
- tests/copy.workload.yaml +15 -0
- tests/distribuffers/__init__.py +0 -0
- tests/distribuffers/multicast/test_cases.yaml +482 -0
- tests/distribuffers/spec/binding/valid_bindings.yaml +97 -0
- tests/distribuffers/spec/distributed.yaml +100 -0
- tests/distribuffers/spec/logical_arch.yaml +32 -0
- tests/distribuffers/spec/physical_arch.yaml +69 -0
- tests/distribuffers/test_binding.py +48 -0
- tests/frontend/__init__.py +0 -0
- tests/frontend/test_mapping_viz.py +52 -0
- tests/mapper/__init__.py +0 -0
- tests/mapper/configs/conv1d/conv1d.mapping.yaml +31 -0
- tests/mapper/configs/conv1d/conv1d.workload.yaml +11 -0
- tests/mapper/configs/two_conv1d/two_conv1d.expected.yaml +38 -0
- tests/mapper/configs/two_conv1d/two_conv1d.mapping.yaml +54 -0
- tests/mapper/configs/two_conv1d/two_conv1d.workload.yaml +19 -0
- tests/mapper/test_mapping_to_isl.py +90 -0
- tests/mapper/test_spatial_reuse_analysis.py +67 -0
- tests/mapper/test_temporal_reuse_analysis.py +56 -0
- tests/mapper/util.py +58 -0
- tests/matmul.mapping.yaml +29 -0
- tests/matmul.workload.yaml +12 -0
- tests/matmul_spatial.mapping.yaml +44 -0
- tests/mha.renames.yaml +65 -0
- tests/mha.workload.yaml +67 -0
- tests/mha.yaml +59 -0
- tests/mha_full.workload.yaml +67 -0
- tests/mobilenet.workload.yaml +35 -0
- tests/mobilenet_long.workload.yaml +64 -0
- tests/pmappingcache.py +24 -0
- tests/processing_stage.arch.yaml +40 -0
- tests/snowcat.arch.yaml +36 -0
- tests/test_ffm_join_pmappings.py +106 -0
- tests/test_ffm_make_pmappings.py +82 -0
- tests/test_ffm_make_tile_shapes.py +49 -0
- tests/test_mapper.py +100 -0
- tests/test_model.py +37 -0
- tests/test_plotting.py +72 -0
- tests/test_processing_stage.py +46 -0
- tests/test_symbolic_model.py +248 -0
- tests/test_workload.py +141 -0
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
from copy import deepcopy
|
|
2
|
+
import inspect
|
|
3
|
+
import os
|
|
4
|
+
from typing import Callable
|
|
5
|
+
import joblib
|
|
6
|
+
import logging
|
|
7
|
+
|
|
8
|
+
from accelforge import arch
|
|
9
|
+
from accelforge import Spec
|
|
10
|
+
from accelforge.mapper.FFM.pmappings import MultiEinsumPmappings
|
|
11
|
+
from accelforge.mapper.FFM.mappings import Mappings
|
|
12
|
+
import accelforge.mapper.FFM._make_pmappings.make_pmappings as pmapper
|
|
13
|
+
from accelforge.frontend.workload import EinsumName
|
|
14
|
+
from accelforge.mapper.FFM._join_pmappings.join_pmappings import (
|
|
15
|
+
clean_compress_and_join_pmappings,
|
|
16
|
+
)
|
|
17
|
+
from accelforge._accelerated_imports import pd
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def map_workload_to_arch(
|
|
24
|
+
spec: Spec,
|
|
25
|
+
einsum_names: list[EinsumName] | None = None,
|
|
26
|
+
can_combine_multiple_runs: bool = False,
|
|
27
|
+
cache_dir: str | None = None,
|
|
28
|
+
print_number_of_pmappings: bool = True,
|
|
29
|
+
_pmapping_row_filter_function: Callable[[pd.Series], bool] | None = None,
|
|
30
|
+
) -> Mappings:
|
|
31
|
+
"""
|
|
32
|
+
Maps a workload to an architecture using the AccelForge FFM mapper.
|
|
33
|
+
|
|
34
|
+
Parameters
|
|
35
|
+
----------
|
|
36
|
+
spec:
|
|
37
|
+
The Spec to map.
|
|
38
|
+
einsum_names:
|
|
39
|
+
The einsum names to map. If None, all einsums will be mapped.
|
|
40
|
+
can_combine_multiple_runs: Whether we would like to be able to combine multiple
|
|
41
|
+
make_pmappings runs. Having this as True allows you to do things like
|
|
42
|
+
`pmappings = make_pmappings(*args_a) | make_pmappings(*args_b)` but slows
|
|
43
|
+
down execution.
|
|
44
|
+
cache_dir:
|
|
45
|
+
The directory to cache pmappings in. If None, no caching will be done.
|
|
46
|
+
print_number_of_pmappings:
|
|
47
|
+
Whether to print the number of pmappings for each einsum.
|
|
48
|
+
_pmapping_row_filter_function:
|
|
49
|
+
A function that takes in a row of the pmapping dataframe and returns
|
|
50
|
+
True if the row should be included in the final mappings, and False
|
|
51
|
+
otherwise. If None, all rows will be included.
|
|
52
|
+
"""
|
|
53
|
+
from accelforge.model.main import evaluate_mapping
|
|
54
|
+
|
|
55
|
+
pmappings = make_pmappings(
|
|
56
|
+
spec,
|
|
57
|
+
einsum_names=einsum_names,
|
|
58
|
+
can_combine_multiple_runs=can_combine_multiple_runs,
|
|
59
|
+
cache_dir=cache_dir,
|
|
60
|
+
print_number_of_pmappings=print_number_of_pmappings,
|
|
61
|
+
)
|
|
62
|
+
mappings = join_pmappings(
|
|
63
|
+
spec,
|
|
64
|
+
pmappings,
|
|
65
|
+
require_all_einsums=einsum_names is not None,
|
|
66
|
+
_pmapping_row_filter_function=_pmapping_row_filter_function,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
new_mapping_data = []
|
|
70
|
+
for i in range(len(mappings.data)):
|
|
71
|
+
local_spec = deepcopy(spec)
|
|
72
|
+
local_spec.model.metrics = local_spec.mapper.ffm.info_metrics
|
|
73
|
+
local_spec.mapping = mappings.data.iloc[i]["Total<SEP>mapping"]()
|
|
74
|
+
# BUG: Mapping._from_pmappings create mappings that cannot be evaluated!
|
|
75
|
+
this_mapping = evaluate_mapping(local_spec)
|
|
76
|
+
new_mapping_data.append(this_mapping.data)
|
|
77
|
+
|
|
78
|
+
mappings.data = pd.concat(new_mapping_data)
|
|
79
|
+
|
|
80
|
+
return mappings
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def make_pmappings(
|
|
84
|
+
spec: Spec,
|
|
85
|
+
einsum_names: list[EinsumName] | None = None,
|
|
86
|
+
can_combine_multiple_runs: bool = False,
|
|
87
|
+
cache_dir: str | None = None,
|
|
88
|
+
print_number_of_pmappings: bool = True,
|
|
89
|
+
) -> MultiEinsumPmappings:
|
|
90
|
+
"""
|
|
91
|
+
Creates pmappings for a spec. Pmappings must be joined together using
|
|
92
|
+
`join_pmappings` to create a full mapping.
|
|
93
|
+
|
|
94
|
+
Parameters
|
|
95
|
+
----------
|
|
96
|
+
spec:
|
|
97
|
+
The Spec to generate pmappings for.
|
|
98
|
+
einsum_names:
|
|
99
|
+
The einsum names to generate pmappings for. If None, all einsums will be
|
|
100
|
+
included.
|
|
101
|
+
can_combine_multiple_runs: Whether we would like to be able to combine multiple
|
|
102
|
+
make_pmappings runs. Having this as True allows you to do things like
|
|
103
|
+
`pmappings = make_pmappings(*args_a) | make_pmappings(*args_b)` but slows
|
|
104
|
+
down execution.
|
|
105
|
+
cache_dir:
|
|
106
|
+
The directory to cache pmappings in. If None, no caching will be done.
|
|
107
|
+
print_number_of_pmappings:
|
|
108
|
+
Whether to print the number of pmappings for each einsum.
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
A MultiEinsumPmappings object.
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
kwargs = dict(
|
|
115
|
+
spec=spec,
|
|
116
|
+
einsum_names=einsum_names,
|
|
117
|
+
can_combine_multiple_runs=can_combine_multiple_runs,
|
|
118
|
+
)
|
|
119
|
+
assert len(kwargs) == len(inspect.signature(_make_pmappings).parameters)
|
|
120
|
+
|
|
121
|
+
if cache_dir is None:
|
|
122
|
+
result = _make_pmappings(**kwargs)
|
|
123
|
+
else:
|
|
124
|
+
|
|
125
|
+
@joblib.Memory(location=os.path.join(cache_dir), compress=True).cache
|
|
126
|
+
def _make_pmappings_cached(**kwargs) -> MultiEinsumPmappings:
|
|
127
|
+
return _make_pmappings(**kwargs)
|
|
128
|
+
|
|
129
|
+
result = _make_pmappings_cached(**kwargs)
|
|
130
|
+
|
|
131
|
+
if print_number_of_pmappings:
|
|
132
|
+
print(result.n_pmapping_string())
|
|
133
|
+
|
|
134
|
+
return result
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def join_pmappings(
|
|
138
|
+
spec: Spec,
|
|
139
|
+
pmappings: MultiEinsumPmappings,
|
|
140
|
+
require_all_einsums: bool = True,
|
|
141
|
+
_pmapping_row_filter_function: Callable[[pd.Series], bool] | None = None,
|
|
142
|
+
) -> Mappings:
|
|
143
|
+
"""
|
|
144
|
+
Joins pmappings into a full mappings for the entire workload. Pmappings can
|
|
145
|
+
be generated using `make_pmappings`.
|
|
146
|
+
|
|
147
|
+
Parameters
|
|
148
|
+
----------
|
|
149
|
+
spec:
|
|
150
|
+
The complete specifications for the workload and architecture.
|
|
151
|
+
pmappings:
|
|
152
|
+
The pmappings to join.
|
|
153
|
+
require_all_einsums:
|
|
154
|
+
If True, all einsums in the workload must have pmappings. If False, only
|
|
155
|
+
einsums that have pmappings will be included in the final mappings.
|
|
156
|
+
_pmapping_row_filter_function:
|
|
157
|
+
A function that takes in a row of the pmapping dataframe and returns
|
|
158
|
+
True if the row should be included in the final mappings, and False
|
|
159
|
+
otherwise. If None, all rows will be included.
|
|
160
|
+
|
|
161
|
+
Returns
|
|
162
|
+
-------
|
|
163
|
+
A Mappings object containing all valid, optimal mappings for the workload.
|
|
164
|
+
"""
|
|
165
|
+
return clean_compress_and_join_pmappings(
|
|
166
|
+
spec, pmappings, require_all_einsums, _pmapping_row_filter_function
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _make_pmappings(
|
|
171
|
+
spec: Spec,
|
|
172
|
+
einsum_names: list[EinsumName] | None = None,
|
|
173
|
+
can_combine_multiple_runs: bool = False,
|
|
174
|
+
) -> MultiEinsumPmappings:
|
|
175
|
+
if einsum_names is None:
|
|
176
|
+
einsum_names = [e.name for e in spec.workload.einsums]
|
|
177
|
+
|
|
178
|
+
einsum2symbol_table = spec.workload.get_constraint_symbol_table(
|
|
179
|
+
einsum_names, spec.renames
|
|
180
|
+
)
|
|
181
|
+
tensor2bits_per_value = spec.workload._get_bits_per_value(einsum2symbol_table)
|
|
182
|
+
for einsum_name, symbol_table in einsum2symbol_table.items():
|
|
183
|
+
for tensor in spec.workload.einsums[einsum_name].tensor_names:
|
|
184
|
+
if tensor not in tensor2bits_per_value:
|
|
185
|
+
raise ValueError(
|
|
186
|
+
f"Tensor {tensor} not found in bits per value for Einsum {einsum_name}. "
|
|
187
|
+
f"Bits per value:\n\t"
|
|
188
|
+
+ "\n\t".join(f"{k}: {v}" for k, v in tensor2bits_per_value.items())
|
|
189
|
+
)
|
|
190
|
+
if tensor not in symbol_table:
|
|
191
|
+
raise ValueError(
|
|
192
|
+
f"Tensor {tensor} not found in symbol table for Einsum {einsum_name}. "
|
|
193
|
+
f"Symbol table:\n\t"
|
|
194
|
+
+ "\n\t".join(f"{k}: {v}" for k, v in symbol_table.items())
|
|
195
|
+
)
|
|
196
|
+
symbol_table[tensor].bits_per_value = tensor2bits_per_value[tensor]
|
|
197
|
+
|
|
198
|
+
pmapping_groups, pmapping_objects, einsum2jobs = pmapper.make_pmappings(
|
|
199
|
+
parsed_spec,
|
|
200
|
+
metrics=spec.mapper.ffm.metrics,
|
|
201
|
+
einsum_names=einsum_names,
|
|
202
|
+
can_combine_multiple_runs=can_combine_multiple_runs,
|
|
203
|
+
einsum2symbol_table=einsum2symbol_table,
|
|
204
|
+
tensor2bits_per_value=tensor2bits_per_value,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
resource2capacity = {}
|
|
208
|
+
who_set = {}
|
|
209
|
+
jobs_flattened = {(e, j) for e, jobs in einsum2jobs.items() for j in jobs}
|
|
210
|
+
for einsum, job in jobs_flattened:
|
|
211
|
+
memories = [m for m in job.flattened_arch if isinstance(m, arch.Memory)]
|
|
212
|
+
for m in memories:
|
|
213
|
+
resource2capacity.setdefault(m.name, m.attributes.size)
|
|
214
|
+
who_set.setdefault(m.name, einsum)
|
|
215
|
+
if resource2capacity[m.name] != m.attributes.size:
|
|
216
|
+
raise ValueError(
|
|
217
|
+
f"Memory {m.name} has different sizes depending on which Einsum "
|
|
218
|
+
f"is being mapped. Memory sizes should not depend on which Einsum "
|
|
219
|
+
f"is being mapped. Einsum {who_set[m.name]} set the size to "
|
|
220
|
+
f"{resource2capacity[m.name]}, but Einsum {einsum} set the size to "
|
|
221
|
+
f"{m.attributes.size}."
|
|
222
|
+
)
|
|
223
|
+
resource2capacity[m.name] = m.attributes.size
|
|
224
|
+
|
|
225
|
+
m = MultiEinsumPmappings(
|
|
226
|
+
pmapping_groups,
|
|
227
|
+
pmapping_objects,
|
|
228
|
+
resource2capacity,
|
|
229
|
+
einsum2jobs,
|
|
230
|
+
can_combine_multiple_runs=can_combine_multiple_runs,
|
|
231
|
+
einsums_with_pmappings_generated=set(
|
|
232
|
+
einsum_names if einsum_names else spec.workload.einsum_names
|
|
233
|
+
),
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
return m
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
from copy import deepcopy
|
|
2
|
+
import inspect
|
|
3
|
+
import os
|
|
4
|
+
from typing import Callable
|
|
5
|
+
import joblib
|
|
6
|
+
import logging
|
|
7
|
+
|
|
8
|
+
from accelforge import arch
|
|
9
|
+
from accelforge import Spec
|
|
10
|
+
from accelforge.mapper.FFM.pmappings import MultiEinsumPmappings
|
|
11
|
+
from accelforge.mapper.FFM.mappings import Mappings
|
|
12
|
+
import accelforge.mapper.FFM._make_pmappings.make_pmappings as pmapper
|
|
13
|
+
from accelforge.frontend.workload import EinsumName
|
|
14
|
+
from accelforge.mapper.FFM._join_pmappings.join_pmappings import (
|
|
15
|
+
clean_compress_and_join_pmappings,
|
|
16
|
+
)
|
|
17
|
+
from accelforge._accelerated_imports import pd
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def map_workload_to_arch(
|
|
24
|
+
spec: Spec,
|
|
25
|
+
einsum_names: list[EinsumName] | None = None,
|
|
26
|
+
can_combine_multiple_runs: bool = False,
|
|
27
|
+
cache_dir: str | None = None,
|
|
28
|
+
print_number_of_pmappings: bool = True,
|
|
29
|
+
_pmapping_row_filter_function: Callable[[pd.Series], bool] | None = None,
|
|
30
|
+
) -> Mappings:
|
|
31
|
+
"""
|
|
32
|
+
Maps a workload to an architecture using the AccelForge FFM mapper.
|
|
33
|
+
|
|
34
|
+
Parameters
|
|
35
|
+
----------
|
|
36
|
+
spec:
|
|
37
|
+
The Spec to map.
|
|
38
|
+
einsum_names:
|
|
39
|
+
The einsum names to map. If None, all einsums will be mapped.
|
|
40
|
+
can_combine_multiple_runs: Whether we would like to be able to combine multiple
|
|
41
|
+
make_pmappings runs. Having this as True allows you to do things like
|
|
42
|
+
`pmappings = make_pmappings(*args_a) | make_pmappings(*args_b)` but slows
|
|
43
|
+
down execution.
|
|
44
|
+
cache_dir:
|
|
45
|
+
The directory to cache pmappings in. If None, no caching will be done.
|
|
46
|
+
print_number_of_pmappings:
|
|
47
|
+
Whether to print the number of pmappings for each einsum.
|
|
48
|
+
_pmapping_row_filter_function:
|
|
49
|
+
A function that takes in a row of the pmapping dataframe and returns
|
|
50
|
+
True if the row should be included in the final mappings, and False
|
|
51
|
+
otherwise. If None, all rows will be included.
|
|
52
|
+
"""
|
|
53
|
+
from accelforge.model.main import evaluate_mapping
|
|
54
|
+
|
|
55
|
+
pmappings = make_pmappings(
|
|
56
|
+
spec,
|
|
57
|
+
einsum_names=einsum_names,
|
|
58
|
+
can_combine_multiple_runs=can_combine_multiple_runs,
|
|
59
|
+
cache_dir=cache_dir,
|
|
60
|
+
print_number_of_pmappings=print_number_of_pmappings,
|
|
61
|
+
)
|
|
62
|
+
mappings = join_pmappings(
|
|
63
|
+
spec,
|
|
64
|
+
pmappings,
|
|
65
|
+
require_all_einsums=einsum_names is not None,
|
|
66
|
+
_pmapping_row_filter_function=_pmapping_row_filter_function,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
new_mapping_data = []
|
|
70
|
+
for i in range(len(mappings.data)):
|
|
71
|
+
local_spec = deepcopy(spec)
|
|
72
|
+
local_spec.model.metrics = local_spec.mapper.ffm.info_metrics
|
|
73
|
+
local_spec.mapping = mappings.data.iloc[i]["Total<SEP>mapping"]()
|
|
74
|
+
# BUG: Mapping._from_pmappings create mappings that cannot be evaluated!
|
|
75
|
+
this_mapping = evaluate_mapping(
|
|
76
|
+
local_spec,
|
|
77
|
+
flattened_arches=mappings.flattened_arches,
|
|
78
|
+
parsed_specs=mappings.parsed_specs,
|
|
79
|
+
)
|
|
80
|
+
new_mapping_data.append(this_mapping.data)
|
|
81
|
+
|
|
82
|
+
mappings.data = pd.concat(new_mapping_data)
|
|
83
|
+
|
|
84
|
+
return mappings
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def make_pmappings(
|
|
88
|
+
spec: Spec,
|
|
89
|
+
einsum_names: list[EinsumName] | None = None,
|
|
90
|
+
can_combine_multiple_runs: bool = False,
|
|
91
|
+
cache_dir: str | None = None,
|
|
92
|
+
print_number_of_pmappings: bool = True,
|
|
93
|
+
) -> MultiEinsumPmappings:
|
|
94
|
+
"""
|
|
95
|
+
Creates pmappings for a spec. Pmappings must be joined together using
|
|
96
|
+
`join_pmappings` to create a full mapping.
|
|
97
|
+
|
|
98
|
+
Parameters
|
|
99
|
+
----------
|
|
100
|
+
spec:
|
|
101
|
+
The Spec to generate pmappings for.
|
|
102
|
+
einsum_names:
|
|
103
|
+
The einsum names to generate pmappings for. If None, all einsums will be
|
|
104
|
+
included.
|
|
105
|
+
can_combine_multiple_runs: Whether we would like to be able to combine multiple
|
|
106
|
+
make_pmappings runs. Having this as True allows you to do things like
|
|
107
|
+
`pmappings = make_pmappings(*args_a) | make_pmappings(*args_b)` but slows
|
|
108
|
+
down execution.
|
|
109
|
+
cache_dir:
|
|
110
|
+
The directory to cache pmappings in. If None, no caching will be done.
|
|
111
|
+
print_number_of_pmappings:
|
|
112
|
+
Whether to print the number of pmappings for each einsum.
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
A MultiEinsumPmappings object.
|
|
116
|
+
"""
|
|
117
|
+
kwargs = dict(
|
|
118
|
+
spec=spec,
|
|
119
|
+
einsum_names=einsum_names,
|
|
120
|
+
can_combine_multiple_runs=can_combine_multiple_runs,
|
|
121
|
+
)
|
|
122
|
+
assert len(kwargs) == len(inspect.signature(_make_pmappings).parameters)
|
|
123
|
+
|
|
124
|
+
if cache_dir is None:
|
|
125
|
+
result = _make_pmappings(**kwargs)
|
|
126
|
+
else:
|
|
127
|
+
|
|
128
|
+
@joblib.Memory(location=os.path.join(cache_dir), compress=True).cache
|
|
129
|
+
def _make_pmappings_cached(**kwargs) -> MultiEinsumPmappings:
|
|
130
|
+
return _make_pmappings(**kwargs)
|
|
131
|
+
|
|
132
|
+
result = _make_pmappings_cached(**kwargs)
|
|
133
|
+
|
|
134
|
+
if print_number_of_pmappings:
|
|
135
|
+
print(result.n_pmapping_string())
|
|
136
|
+
|
|
137
|
+
return result
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def join_pmappings(
|
|
141
|
+
spec: Spec,
|
|
142
|
+
pmappings: MultiEinsumPmappings,
|
|
143
|
+
require_all_einsums: bool = True,
|
|
144
|
+
_pmapping_row_filter_function: Callable[[pd.Series], bool] | None = None,
|
|
145
|
+
) -> Mappings:
|
|
146
|
+
"""
|
|
147
|
+
Joins pmappings into a full mappings for the entire workload. Pmappings can
|
|
148
|
+
be generated using `make_pmappings`.
|
|
149
|
+
|
|
150
|
+
Parameters
|
|
151
|
+
----------
|
|
152
|
+
spec:
|
|
153
|
+
The complete specifications for the workload and architecture.
|
|
154
|
+
pmappings:
|
|
155
|
+
The pmappings to join.
|
|
156
|
+
require_all_einsums:
|
|
157
|
+
If True, all einsums in the workload must have pmappings. If False, only
|
|
158
|
+
einsums that have pmappings will be included in the final mappings.
|
|
159
|
+
_pmapping_row_filter_function:
|
|
160
|
+
A function that takes in a row of the pmapping dataframe and returns
|
|
161
|
+
True if the row should be included in the final mappings, and False
|
|
162
|
+
otherwise. If None, all rows will be included.
|
|
163
|
+
|
|
164
|
+
Returns
|
|
165
|
+
-------
|
|
166
|
+
A Mappings object containing all valid, optimal mappings for the workload.
|
|
167
|
+
"""
|
|
168
|
+
return clean_compress_and_join_pmappings(
|
|
169
|
+
spec, pmappings, require_all_einsums, _pmapping_row_filter_function
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _make_pmappings(
|
|
174
|
+
spec: Spec,
|
|
175
|
+
einsum_names: list[EinsumName] | None = None,
|
|
176
|
+
can_combine_multiple_runs: bool = False,
|
|
177
|
+
) -> MultiEinsumPmappings:
|
|
178
|
+
if einsum_names is None:
|
|
179
|
+
einsum_names = [e.name for e in spec.workload.einsums]
|
|
180
|
+
|
|
181
|
+
pmapping_groups, pmapping_objects, einsum2jobs = pmapper.make_pmappings(
|
|
182
|
+
spec,
|
|
183
|
+
metrics=spec.mapper.ffm.metrics,
|
|
184
|
+
einsum_names=einsum_names,
|
|
185
|
+
can_combine_multiple_runs=can_combine_multiple_runs,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
flattened_arches = {}
|
|
189
|
+
parsed_specs = {}
|
|
190
|
+
for einsum_name, jobs in einsum2jobs.items():
|
|
191
|
+
for job in jobs:
|
|
192
|
+
compute_name = job.flattened_arch[-1].name
|
|
193
|
+
flattened_arches[(einsum_name, compute_name)] = job.flattened_arch
|
|
194
|
+
parsed_specs[einsum_name] = job.spec
|
|
195
|
+
|
|
196
|
+
m = MultiEinsumPmappings(
|
|
197
|
+
pmapping_groups,
|
|
198
|
+
pmapping_objects,
|
|
199
|
+
einsum2jobs,
|
|
200
|
+
can_combine_multiple_runs=can_combine_multiple_runs,
|
|
201
|
+
einsums_with_pmappings_generated=set(
|
|
202
|
+
einsum_names if einsum_names else spec.workload.einsum_names
|
|
203
|
+
),
|
|
204
|
+
flattened_arches=flattened_arches,
|
|
205
|
+
parsed_specs=parsed_specs,
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
return m
|