accelforge 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- accelforge/__init__.py +21 -0
- accelforge/_accelerated_imports.py +16 -0
- accelforge/_deprecate/_simanneal/evalmapping.py +271 -0
- accelforge/_deprecate/_simanneal/mapspaceglobals.py +298 -0
- accelforge/_deprecate/_simanneal/simanneal.py +666 -0
- accelforge/_deprecate/_simanneal/tracking.py +105 -0
- accelforge/_deprecate/_simanneal/wrappers.py +218 -0
- accelforge/_deprecate/_simanneal2/__init__.py +7 -0
- accelforge/_deprecate/_simanneal2/simanneal.py +493 -0
- accelforge/_deprecate/_simanneal2/tracking.py +116 -0
- accelforge/_deprecate/compatibility_util.py +181 -0
- accelforge/_deprecate/layerdeduplication/__init__.py +2 -0
- accelforge/_deprecate/layerdeduplication/group_similar_einsums.py +160 -0
- accelforge/_deprecate/layerdeduplication/grouped_einsums.py +84 -0
- accelforge/_deprecate/mapping_filter_tags/__init__.py +2 -0
- accelforge/_deprecate/mapping_filter_tags/ffmt.py +212 -0
- accelforge/_deprecate/mapping_filter_tags/onesplit.py +24 -0
- accelforge/_deprecate/mapping_filter_tags/util.py +24 -0
- accelforge/_deprecate/tags.py +69 -0
- accelforge/_deprecate/viz/__init__.py +0 -0
- accelforge/_deprecate/viz/interactive.py +159 -0
- accelforge/_deprecate/viz/reservationtree.py +307 -0
- accelforge/_deprecate/viz/ski_slope.py +88 -0
- accelforge/_version.py +15 -0
- accelforge/examples.py +39 -0
- accelforge/frontend/__init__.py +10 -0
- accelforge/frontend/_binding.py +129 -0
- accelforge/frontend/_workload_isl/__init__.py +2 -0
- accelforge/frontend/_workload_isl/_isl.py +149 -0
- accelforge/frontend/_workload_isl/_symbolic.py +141 -0
- accelforge/frontend/arch copy.py +1544 -0
- accelforge/frontend/arch.py +1642 -0
- accelforge/frontend/config.py +63 -0
- accelforge/frontend/mapper/__init__.py +5 -0
- accelforge/frontend/mapper/ffm.py +126 -0
- accelforge/frontend/mapper/mapper.py +7 -0
- accelforge/frontend/mapper/metrics.py +30 -0
- accelforge/frontend/mapping/__init__.py +1 -0
- accelforge/frontend/mapping/mapping.py +1736 -0
- accelforge/frontend/model.py +14 -0
- accelforge/frontend/renames.py +150 -0
- accelforge/frontend/spec copy.py +230 -0
- accelforge/frontend/spec.py +301 -0
- accelforge/frontend/variables.py +12 -0
- accelforge/frontend/workload.py +952 -0
- accelforge/mapper/FFM/__init__.py +9 -0
- accelforge/mapper/FFM/_join_pmappings/__init__.py +0 -0
- accelforge/mapper/FFM/_join_pmappings/compatibility.py +653 -0
- accelforge/mapper/FFM/_join_pmappings/compress_pmappings.py +140 -0
- accelforge/mapper/FFM/_join_pmappings/join_pmappings.py +703 -0
- accelforge/mapper/FFM/_join_pmappings/pmapping_dataframe.py +901 -0
- accelforge/mapper/FFM/_join_pmappings/pmapping_group.py +337 -0
- accelforge/mapper/FFM/_make_pmappings/contraints/__init__.py +0 -0
- accelforge/mapper/FFM/_make_pmappings/contraints/constraints.py +360 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/__init__.py +1 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_loops.py +373 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_pmapping_templates.py +463 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_reservations.py +95 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storage_order.py +382 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storages.py +155 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings.py +411 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/__init__.py +1 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_pmappings_from_templates.py +407 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_tile_shapes.py +1681 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/run_model.py +170 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/symbol_relations.py +174 -0
- accelforge/mapper/FFM/_make_pmappings/pmapper_job.py +282 -0
- accelforge/mapper/FFM/_pareto_df/df_convention.py +273 -0
- accelforge/mapper/FFM/_pareto_df/pareto copy.py +836 -0
- accelforge/mapper/FFM/_pareto_df/pareto.py +508 -0
- accelforge/mapper/FFM/data.py +61 -0
- accelforge/mapper/FFM/main copy.py +236 -0
- accelforge/mapper/FFM/main.py +208 -0
- accelforge/mapper/FFM/mappings.py +510 -0
- accelforge/mapper/FFM/pmappings.py +310 -0
- accelforge/mapper/__init__.py +4 -0
- accelforge/mapper.py +0 -0
- accelforge/model/__init__.py +1 -0
- accelforge/model/_looptree/__init__.py +0 -0
- accelforge/model/_looptree/accesses.py +335 -0
- accelforge/model/_looptree/capacity/__init__.py +1 -0
- accelforge/model/_looptree/capacity/aggregators.py +36 -0
- accelforge/model/_looptree/capacity/capacity.py +47 -0
- accelforge/model/_looptree/energy.py +150 -0
- accelforge/model/_looptree/equivalent_ranks.py +29 -0
- accelforge/model/_looptree/latency/__init__.py +1 -0
- accelforge/model/_looptree/latency/latency.py +98 -0
- accelforge/model/_looptree/latency/memory.py +120 -0
- accelforge/model/_looptree/latency/processors.py +92 -0
- accelforge/model/_looptree/mapping_utilities.py +71 -0
- accelforge/model/_looptree/reuse/__init__.py +4 -0
- accelforge/model/_looptree/reuse/isl/__init__.py +1 -0
- accelforge/model/_looptree/reuse/isl/des.py +59 -0
- accelforge/model/_looptree/reuse/isl/isl_functions.py +374 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/__init__.py +4 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/analyze_mapping.py +297 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/skews_from_mapping.py +236 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/tiling.py +685 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/types.py +188 -0
- accelforge/model/_looptree/reuse/isl/spatial.py +260 -0
- accelforge/model/_looptree/reuse/isl/temporal.py +182 -0
- accelforge/model/_looptree/reuse/symbolic/__init__.py +1 -0
- accelforge/model/_looptree/reuse/symbolic/symbolic copy 2.py +1346 -0
- accelforge/model/_looptree/reuse/symbolic/symbolic copy.py +1408 -0
- accelforge/model/_looptree/reuse/symbolic/symbolic.py +1396 -0
- accelforge/model/_looptree/run.py +122 -0
- accelforge/model/_looptree/types.py +26 -0
- accelforge/model/_looptree/visualization/__init__.py +0 -0
- accelforge/model/_looptree/visualization/occupancy.py +11 -0
- accelforge/model/main.py +222 -0
- accelforge/plotting/__init__.py +2 -0
- accelforge/plotting/mappings.py +219 -0
- accelforge/plotting/specs.py +57 -0
- accelforge/util/__init__.py +4 -0
- accelforge/util/_base_analysis_types.py +24 -0
- accelforge/util/_basetypes.py +1089 -0
- accelforge/util/_frozenset.py +36 -0
- accelforge/util/_isl.py +29 -0
- accelforge/util/_itertools.py +14 -0
- accelforge/util/_mathfuncs.py +57 -0
- accelforge/util/_parse_expressions.py +339 -0
- accelforge/util/_picklecache.py +32 -0
- accelforge/util/_setexpressions.py +268 -0
- accelforge/util/_sympy/__init__.py +0 -0
- accelforge/util/_sympy/broadcast_max.py +18 -0
- accelforge/util/_visualization.py +112 -0
- accelforge/util/_yaml.py +579 -0
- accelforge/util/parallel.py +193 -0
- accelforge-0.0.1.dist-info/METADATA +64 -0
- accelforge-0.0.1.dist-info/RECORD +258 -0
- accelforge-0.0.1.dist-info/WHEEL +5 -0
- accelforge-0.0.1.dist-info/licenses/LICENSE +19 -0
- accelforge-0.0.1.dist-info/top_level.txt +5 -0
- docs/_build/html/_sources/fastfusion.frontend.mapper.rst.txt +37 -0
- docs/_build/html/_sources/fastfusion.frontend.rst.txt +70 -0
- docs/_build/html/_sources/fastfusion.frontend.workload.rst.txt +21 -0
- docs/_build/html/_sources/fastfusion.mapper.FFM.rst.txt +37 -0
- docs/_build/html/_sources/fastfusion.mapper.rst.txt +18 -0
- docs/_build/html/_sources/fastfusion.rst.txt +20 -0
- docs/_build/html/_sources/fastfusion.util.rst.txt +21 -0
- docs/_build/html/_sources/index.rst.txt +87 -0
- docs/_build/html/_sources/modules.rst.txt +7 -0
- docs/_build/html/_sources/notes/citation.rst.txt +45 -0
- docs/_build/html/_sources/notes/definitions.rst.txt +43 -0
- docs/_build/html/_sources/notes/faqs.rst.txt +39 -0
- docs/_build/html/_sources/notes/modeling/accelerator_energy_latency.rst.txt +72 -0
- docs/_build/html/_sources/notes/modeling/component_energy_area.rst.txt +96 -0
- docs/_build/html/_sources/notes/modeling/mapping.rst.txt +100 -0
- docs/_build/html/_sources/notes/modeling.rst.txt +33 -0
- docs/_build/html/_sources/notes/parsing/arithmetic_parsing.rst.txt +136 -0
- docs/_build/html/_sources/notes/parsing/setexpressions.rst.txt +63 -0
- docs/_build/html/_sources/notes/parsing/yaml_parsing.rst.txt +176 -0
- docs/_build/html/_sources/notes/quickstart_and_installation.rst.txt +9 -0
- docs/_build/html/_sources/notes/spec/architecture.rst.txt +133 -0
- docs/_build/html/_sources/notes/spec/mapping.rst.txt +12 -0
- docs/_build/html/_sources/notes/spec/workload.rst.txt +83 -0
- docs/_build/html/_sources/notes/spec.rst.txt +36 -0
- docs/source/_ext/include_attrs.py +213 -0
- docs/source/_ext/include_docstring.py +364 -0
- docs/source/_ext/include_functions.py +154 -0
- docs/source/_ext/include_notebook.py +131 -0
- docs/source/_ext/include_yaml.py +119 -0
- docs/source/_ext/inherited_attributes.py +222 -0
- docs/source/_ext/paths.py +4 -0
- docs/source/conf.py +79 -0
- examples/arches/compute_in_memory/_include.yaml +74 -0
- examples/arches/compute_in_memory/_include_functions.py +229 -0
- examples/arches/compute_in_memory/_load_spec.py +57 -0
- examples/arches/compute_in_memory/components/c2c_multiplier.py +181 -0
- examples/arches/compute_in_memory/components/dac_c2c_r2r.py +605 -0
- examples/arches/compute_in_memory/components/misc.py +195 -0
- examples/arches/compute_in_memory/components/util/bit_functions.py +51 -0
- examples/arches/compute_in_memory/components/zero_comparator.py +92 -0
- examples/arches/compute_in_memory/isaac.yaml +233 -0
- examples/arches/compute_in_memory/memory_cells/ecram_demo.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/rram_example.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/rram_isaac_isca_2016.yaml +64 -0
- examples/arches/compute_in_memory/memory_cells/rram_neurosim_default.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/rram_raella_isca_2023.yaml +70 -0
- examples/arches/compute_in_memory/memory_cells/rram_wan_nature_2022.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_colonnade_jssc_2021.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_example.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_jia_jssc_2020.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_sinangil_jssc_2021.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_wang_vlsi_2022.yaml +63 -0
- examples/arches/compute_in_memory/wang_vlsi_2022.yaml +289 -0
- examples/arches/eyeriss.yaml +68 -0
- examples/arches/fanout_variations/at_glb.yaml +31 -0
- examples/arches/fanout_variations/at_glb_with_fanout_node.yaml +34 -0
- examples/arches/fanout_variations/at_mac.yaml +31 -0
- examples/arches/fanout_variations/at_mac_with_constraints.yaml +38 -0
- examples/arches/fanout_variations/at_mac_with_fanout_node.yaml +34 -0
- examples/arches/nvdla.yaml +47 -0
- examples/arches/simple.yaml +28 -0
- examples/arches/tpu_v4i.yaml +67 -0
- examples/mappings/unfused_matmuls_to_simple.yaml +33 -0
- examples/misc/component_annotated.yaml +33 -0
- examples/workloads/gpt3_6.7B.yaml +124 -0
- examples/workloads/matmuls.yaml +20 -0
- examples/workloads/mobilenet_28.yaml +81 -0
- examples/workloads/mobilenet_various_separate.yaml +106 -0
- examples/workloads/three_matmuls_annotated.yaml +59 -0
- notebooks/.ipynb_checkpoints/fastfusion_arch_study_michael-checkpoint.ipynb +359 -0
- notebooks/compute_in_memory/_scripts.py +339 -0
- notebooks/compute_in_memory/isaac.guide.ipynb +270 -0
- notebooks/compute_in_memory/wang_vlsi_2022.ipynb +602 -0
- notebooks/paths.py +4 -0
- notebooks/tutorials/.ipynb_checkpoints/1_FFM-checkpoint.ipynb +3110 -0
- notebooks/tutorials/FFM.ipynb +3498 -0
- notebooks/tutorials/_include.py +48 -0
- notebooks/tutorials/component_energy_area.ipynb +363 -0
- tests/Q_mapping.yaml +38 -0
- tests/__init__.py +0 -0
- tests/conv.mapping.yaml +27 -0
- tests/conv.workload.yaml +13 -0
- tests/conv_sym.mapping.yaml +43 -0
- tests/copy.mapping.yaml +35 -0
- tests/copy.workload.yaml +15 -0
- tests/distribuffers/__init__.py +0 -0
- tests/distribuffers/multicast/test_cases.yaml +482 -0
- tests/distribuffers/spec/binding/valid_bindings.yaml +97 -0
- tests/distribuffers/spec/distributed.yaml +100 -0
- tests/distribuffers/spec/logical_arch.yaml +32 -0
- tests/distribuffers/spec/physical_arch.yaml +69 -0
- tests/distribuffers/test_binding.py +48 -0
- tests/frontend/__init__.py +0 -0
- tests/frontend/test_mapping_viz.py +52 -0
- tests/mapper/__init__.py +0 -0
- tests/mapper/configs/conv1d/conv1d.mapping.yaml +31 -0
- tests/mapper/configs/conv1d/conv1d.workload.yaml +11 -0
- tests/mapper/configs/two_conv1d/two_conv1d.expected.yaml +38 -0
- tests/mapper/configs/two_conv1d/two_conv1d.mapping.yaml +54 -0
- tests/mapper/configs/two_conv1d/two_conv1d.workload.yaml +19 -0
- tests/mapper/test_mapping_to_isl.py +90 -0
- tests/mapper/test_spatial_reuse_analysis.py +67 -0
- tests/mapper/test_temporal_reuse_analysis.py +56 -0
- tests/mapper/util.py +58 -0
- tests/matmul.mapping.yaml +29 -0
- tests/matmul.workload.yaml +12 -0
- tests/matmul_spatial.mapping.yaml +44 -0
- tests/mha.renames.yaml +65 -0
- tests/mha.workload.yaml +67 -0
- tests/mha.yaml +59 -0
- tests/mha_full.workload.yaml +67 -0
- tests/mobilenet.workload.yaml +35 -0
- tests/mobilenet_long.workload.yaml +64 -0
- tests/pmappingcache.py +24 -0
- tests/processing_stage.arch.yaml +40 -0
- tests/snowcat.arch.yaml +36 -0
- tests/test_ffm_join_pmappings.py +106 -0
- tests/test_ffm_make_pmappings.py +82 -0
- tests/test_ffm_make_tile_shapes.py +49 -0
- tests/test_mapper.py +100 -0
- tests/test_model.py +37 -0
- tests/test_plotting.py +72 -0
- tests/test_processing_stage.py +46 -0
- tests/test_symbolic_model.py +248 -0
- tests/test_workload.py +141 -0
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
from typing import Callable
|
|
3
|
+
from uuid import UUID
|
|
4
|
+
from accelforge.frontend import arch
|
|
5
|
+
from accelforge.frontend.spec import Spec
|
|
6
|
+
from accelforge.mapper.FFM._join_pmappings.pmapping_group import PmappingGroup
|
|
7
|
+
from accelforge.frontend.workload import EinsumName
|
|
8
|
+
from accelforge.frontend.mapping import Mapping
|
|
9
|
+
from accelforge.mapper.FFM._make_pmappings.pmapper_job import Job
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class MultiEinsumPmappings:
|
|
13
|
+
"""
|
|
14
|
+
A collection of pmappings for each Einsum in a workload, generated by
|
|
15
|
+
:func:`~accelforge.mapper.FFM.make_pmappings`.
|
|
16
|
+
|
|
17
|
+
Parameters
|
|
18
|
+
----------
|
|
19
|
+
einsum2pmappings:
|
|
20
|
+
A dictionary of Einsum names to lists of PmappingGroups. PmappingGroups contain
|
|
21
|
+
the Pareto-optimal pmappings for the Einsum.
|
|
22
|
+
pmapping_objects:
|
|
23
|
+
A dictionary of Einsum names to dictionaries of UUIDs to Mappings. The entries
|
|
24
|
+
in the PmappingGroup objects reference these pmapping objects.
|
|
25
|
+
einsum2jobs:
|
|
26
|
+
A dictionary of Einsum names to lists of Jobs that generated the pmappings.
|
|
27
|
+
can_combine_multiple_runs:
|
|
28
|
+
Whether the pmappings can be combined from multiple runs. If this is True, then
|
|
29
|
+
multiple make_pmappings calls can be summed together to get a single
|
|
30
|
+
MultiEinsumPmappings object. If this is True, the mapper may run more slowly.
|
|
31
|
+
einsums_with_pmappings_generated:
|
|
32
|
+
Einsums for which pmappings were generated (or attempted to be generated).
|
|
33
|
+
flattened_arches:
|
|
34
|
+
A dictionary of (EinsumName, Compute Name) to lists of architecture nodes. These
|
|
35
|
+
contain the parsed and flattened architecture node for that particular Einsum
|
|
36
|
+
and compute combination.
|
|
37
|
+
parsed_specs:
|
|
38
|
+
A dictionary of Einsum names to parsed specifications. These contain the parsed
|
|
39
|
+
specification for that particular Einsum.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(
|
|
43
|
+
self,
|
|
44
|
+
einsum2pmappings: dict[EinsumName, list[PmappingGroup]],
|
|
45
|
+
pmapping_objects: dict[EinsumName, dict[UUID, Mapping]],
|
|
46
|
+
einsum2jobs: dict[EinsumName, list[Job]],
|
|
47
|
+
can_combine_multiple_runs: bool,
|
|
48
|
+
einsums_with_pmappings_generated: set[EinsumName],
|
|
49
|
+
flattened_arches: dict[EinsumName, list[arch.Leaf]],
|
|
50
|
+
parsed_specs: dict[EinsumName, Spec],
|
|
51
|
+
):
|
|
52
|
+
self.einsum2pmappings: dict[EinsumName, list[PmappingGroup]] = einsum2pmappings
|
|
53
|
+
self.pmapping_objects: dict[EinsumName, dict[UUID, Mapping]] = pmapping_objects
|
|
54
|
+
self.einsum2jobs: dict[EinsumName, list[Job]] = einsum2jobs
|
|
55
|
+
self.can_combine_multiple_runs: bool = can_combine_multiple_runs
|
|
56
|
+
self.einsums_with_pmappings_generated: set[EinsumName] = (
|
|
57
|
+
einsums_with_pmappings_generated
|
|
58
|
+
)
|
|
59
|
+
self.flattened_arches: dict[(EinsumName, str), list[arch.Leaf]] = (
|
|
60
|
+
flattened_arches
|
|
61
|
+
)
|
|
62
|
+
self.parsed_specs: dict[EinsumName, Spec] = parsed_specs
|
|
63
|
+
|
|
64
|
+
def __or__(self, other: "MultiEinsumPmappings"):
|
|
65
|
+
if not self.can_combine_multiple_runs or not other.can_combine_multiple_runs:
|
|
66
|
+
raise ValueError(
|
|
67
|
+
"Must call make_pmappings with can_combine_multiple_runs=True to combine pmappings "
|
|
68
|
+
"from multiple runs."
|
|
69
|
+
)
|
|
70
|
+
self = copy.copy(self)
|
|
71
|
+
for einsum_name, pmappings in other.einsum2pmappings.items():
|
|
72
|
+
self.einsum2pmappings.setdefault(einsum_name, []).extend(pmappings)
|
|
73
|
+
for einsum_name, jobs in other.einsum2jobs.items():
|
|
74
|
+
self.einsum2jobs.setdefault(einsum_name, []).extend(jobs)
|
|
75
|
+
self.pmapping_objects.update(other.pmapping_objects)
|
|
76
|
+
self.einsums_with_pmappings_generated.update(
|
|
77
|
+
other.einsums_with_pmappings_generated
|
|
78
|
+
)
|
|
79
|
+
self.parsed_specs.update(other.parsed_specs)
|
|
80
|
+
self.flattened_arches.update(other.flattened_arches)
|
|
81
|
+
return self
|
|
82
|
+
|
|
83
|
+
def _filter(
|
|
84
|
+
self,
|
|
85
|
+
filter_lambda: Callable[[PmappingGroup], bool],
|
|
86
|
+
einsums_with_pmappings_generated: list[EinsumName] | None = None,
|
|
87
|
+
):
|
|
88
|
+
new_einsum2pmappings = {}
|
|
89
|
+
if einsums_with_pmappings_generated is None:
|
|
90
|
+
einsums_with_pmappings_generated = list(self.einsum2pmappings.keys())
|
|
91
|
+
for einsum_name in einsums_with_pmappings_generated:
|
|
92
|
+
new_einsum2pmappings[einsum_name] = [
|
|
93
|
+
pm for pm in self.einsum2pmappings[einsum_name] if filter_lambda(pm)
|
|
94
|
+
]
|
|
95
|
+
|
|
96
|
+
return MultiEinsumPmappings(
|
|
97
|
+
einsum2pmappings=new_einsum2pmappings,
|
|
98
|
+
pmapping_objects=self.pmapping_objects,
|
|
99
|
+
einsum2jobs=self.einsum2jobs,
|
|
100
|
+
can_combine_multiple_runs=self.can_combine_multiple_runs,
|
|
101
|
+
einsums_with_pmappings_generated=self.einsums_with_pmappings_generated,
|
|
102
|
+
flattened_arches=self.flattened_arches,
|
|
103
|
+
parsed_specs=self.parsed_specs,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
def drop_einsums(self, *einsums_with_pmappings_generated: EinsumName):
|
|
107
|
+
"""
|
|
108
|
+
Removes all pmappings for the given Einsums.
|
|
109
|
+
|
|
110
|
+
Parameters
|
|
111
|
+
----------
|
|
112
|
+
einsums_with_pmappings_generated:
|
|
113
|
+
The Einsums for which to remove pmappings.
|
|
114
|
+
"""
|
|
115
|
+
for einsum_name in einsums_with_pmappings_generated:
|
|
116
|
+
del self.einsum2pmappings[einsum_name]
|
|
117
|
+
del self.pmapping_objects[einsum_name]
|
|
118
|
+
del self.einsum2jobs[einsum_name]
|
|
119
|
+
self.einsums_with_pmappings_generated.remove(einsum_name)
|
|
120
|
+
|
|
121
|
+
def pmapping_keep_rates(
|
|
122
|
+
self, per_einsum: bool = False
|
|
123
|
+
) -> dict[EinsumName, dict[str, float]] | dict[str, float]:
|
|
124
|
+
"""
|
|
125
|
+
Returns the keep rates for each cause of pmapping removal. For example, if only
|
|
126
|
+
25% of the pmappings have a valid spatial fanout, then the keep rate for the
|
|
127
|
+
spatial fanout cause will be 0.25.
|
|
128
|
+
|
|
129
|
+
Parameters
|
|
130
|
+
----------
|
|
131
|
+
per_einsum:
|
|
132
|
+
If True, returns a dictionary of keep rates for each Einsum.
|
|
133
|
+
|
|
134
|
+
Returns
|
|
135
|
+
-------
|
|
136
|
+
A dictionary of keep rates for each cause of pmapping removal.
|
|
137
|
+
"""
|
|
138
|
+
result = {}
|
|
139
|
+
einsum2npmappings = self.n_total_pmappings(per_einsum=True)
|
|
140
|
+
|
|
141
|
+
for einsum_name, jobs in self.einsum2jobs.items():
|
|
142
|
+
cur_result = result.setdefault(einsum_name, {})
|
|
143
|
+
for job in jobs:
|
|
144
|
+
for cause, keep_rate in job.pmapping_keep_rates.items():
|
|
145
|
+
cur_result.setdefault(cause, 0)
|
|
146
|
+
cur_result[cause] += job.n_total_pmappings * keep_rate
|
|
147
|
+
|
|
148
|
+
if per_einsum:
|
|
149
|
+
for einsum_name, npmappings in einsum2npmappings.items():
|
|
150
|
+
for cause, keep_rate in result[einsum_name].items():
|
|
151
|
+
result[einsum_name][cause] = keep_rate / npmappings
|
|
152
|
+
else:
|
|
153
|
+
new_result = {}
|
|
154
|
+
n_total_pmappings = sum(einsum2npmappings.values())
|
|
155
|
+
for einsum_name, keep_rates in result.items():
|
|
156
|
+
for cause, keep_rate in keep_rates.items():
|
|
157
|
+
new_result.setdefault(cause, 0)
|
|
158
|
+
new_result[cause] += keep_rate / n_total_pmappings
|
|
159
|
+
result = new_result
|
|
160
|
+
|
|
161
|
+
return result
|
|
162
|
+
|
|
163
|
+
def n_total_pmappings(
|
|
164
|
+
self, per_einsum: bool = False
|
|
165
|
+
) -> int | dict[EinsumName, int]:
|
|
166
|
+
"""
|
|
167
|
+
Returns the number of total pmappings in the mapspace.
|
|
168
|
+
|
|
169
|
+
Parameters
|
|
170
|
+
----------
|
|
171
|
+
per_einsum:
|
|
172
|
+
If True, returns a dictionary of total pmappings for each Einsum.
|
|
173
|
+
|
|
174
|
+
Returns
|
|
175
|
+
-------
|
|
176
|
+
The number of total pmappings in the mapspace.
|
|
177
|
+
"""
|
|
178
|
+
result = {
|
|
179
|
+
einsum_name: sum(job.n_total_pmappings for job in jobs)
|
|
180
|
+
for einsum_name, jobs in self.einsum2jobs.items()
|
|
181
|
+
}
|
|
182
|
+
if per_einsum:
|
|
183
|
+
return result
|
|
184
|
+
return sum(result.values())
|
|
185
|
+
|
|
186
|
+
def n_valid_pmappings(
|
|
187
|
+
self, per_einsum: bool = False
|
|
188
|
+
) -> int | dict[EinsumName, int]:
|
|
189
|
+
"""
|
|
190
|
+
Returns the number of valid pmappings for each Einsum. A valid pmapping is one
|
|
191
|
+
that satisfies all constraints and resource usage limits.
|
|
192
|
+
|
|
193
|
+
Parameters
|
|
194
|
+
----------
|
|
195
|
+
per_einsum:
|
|
196
|
+
If True, returns a dictionary of valid pmappings for each Einsum.
|
|
197
|
+
|
|
198
|
+
Returns
|
|
199
|
+
-------
|
|
200
|
+
The number of valid pmappings in the mapspace.
|
|
201
|
+
"""
|
|
202
|
+
result = {
|
|
203
|
+
einsum_name: sum(job.n_valid_pmappings for job in jobs)
|
|
204
|
+
for einsum_name, jobs in self.einsum2jobs.items()
|
|
205
|
+
}
|
|
206
|
+
if per_einsum:
|
|
207
|
+
return result
|
|
208
|
+
return sum(result.values())
|
|
209
|
+
|
|
210
|
+
def n_pareto_optimal_pmappings(
|
|
211
|
+
self, per_einsum: bool = False
|
|
212
|
+
) -> int | dict[EinsumName, int]:
|
|
213
|
+
"""
|
|
214
|
+
Returns the number of Pareto-optimal pmappings for each Einsum. This is the
|
|
215
|
+
number of mappings that will be returned by the make_pmappings function.
|
|
216
|
+
|
|
217
|
+
Parameters
|
|
218
|
+
----------
|
|
219
|
+
per_einsum:
|
|
220
|
+
If True, returns a dictionary of Pareto-optimal pmappings for each Einsum.
|
|
221
|
+
|
|
222
|
+
Returns
|
|
223
|
+
-------
|
|
224
|
+
The number of Pareto-optimal pmappings in the mapspace.
|
|
225
|
+
"""
|
|
226
|
+
result = {
|
|
227
|
+
einsum_name: sum(len(p) for p in pmappings)
|
|
228
|
+
for einsum_name, pmappings in self.einsum2pmappings.items()
|
|
229
|
+
}
|
|
230
|
+
if per_einsum:
|
|
231
|
+
return result
|
|
232
|
+
return sum(result.values())
|
|
233
|
+
|
|
234
|
+
def n_evaluated_pmappings(
|
|
235
|
+
self, per_einsum: bool = False
|
|
236
|
+
) -> int | dict[EinsumName, int]:
|
|
237
|
+
"""
|
|
238
|
+
Returns the number of pmappings that were evaluated for each Einsum. This is
|
|
239
|
+
greater than the number of Pareto-optimal pmappings because some mappings are
|
|
240
|
+
found to be suboptimal after they have been evaluated.
|
|
241
|
+
|
|
242
|
+
Parameters
|
|
243
|
+
----------
|
|
244
|
+
per_einsum:
|
|
245
|
+
If True, returns a dictionary of evaluated pmappings for each Einsum.
|
|
246
|
+
|
|
247
|
+
Returns
|
|
248
|
+
-------
|
|
249
|
+
The number of evaluated pmappings in the mapspace.
|
|
250
|
+
"""
|
|
251
|
+
|
|
252
|
+
result = {
|
|
253
|
+
einsum_name: sum(job.n_evaluated_pmappings for job in jobs)
|
|
254
|
+
for einsum_name, jobs in self.einsum2jobs.items()
|
|
255
|
+
}
|
|
256
|
+
if per_einsum:
|
|
257
|
+
return result
|
|
258
|
+
return sum(result.values())
|
|
259
|
+
|
|
260
|
+
def n_pmapping_string(self) -> str:
|
|
261
|
+
"""
|
|
262
|
+
Returns a string representation of the number of pmappings in the mapspace.
|
|
263
|
+
Printing this can help diagnose if the mapper is not finding any pmappings or
|
|
264
|
+
mappings.
|
|
265
|
+
|
|
266
|
+
Returns
|
|
267
|
+
-------
|
|
268
|
+
A string representation of the number of pmappings in the mapspace.
|
|
269
|
+
"""
|
|
270
|
+
if "Total" in self.einsum2pmappings:
|
|
271
|
+
raise ValueError(
|
|
272
|
+
f"Cannot print stats for a MultiEinsumPmappings object that has "
|
|
273
|
+
f"an Einsum named 'Total'. Use a different name for the Einsum."
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
n_total_pmappings = self.n_total_pmappings(per_einsum=True)
|
|
277
|
+
n_valid_pmappings = self.n_valid_pmappings(per_einsum=True)
|
|
278
|
+
n_evaluated_pmappings = self.n_evaluated_pmappings(per_einsum=True)
|
|
279
|
+
n_pareto_optimal_pmappings = self.n_pareto_optimal_pmappings(per_einsum=True)
|
|
280
|
+
|
|
281
|
+
for x in (
|
|
282
|
+
n_total_pmappings,
|
|
283
|
+
n_valid_pmappings,
|
|
284
|
+
n_evaluated_pmappings,
|
|
285
|
+
n_pareto_optimal_pmappings,
|
|
286
|
+
):
|
|
287
|
+
x["Total"] = sum(x.values())
|
|
288
|
+
|
|
289
|
+
s = []
|
|
290
|
+
for e in n_total_pmappings:
|
|
291
|
+
t = n_total_pmappings[e]
|
|
292
|
+
v = n_valid_pmappings[e]
|
|
293
|
+
ev = n_evaluated_pmappings[e]
|
|
294
|
+
p = n_pareto_optimal_pmappings[e]
|
|
295
|
+
|
|
296
|
+
def fmt(x, total: bool = True):
|
|
297
|
+
x = round(x)
|
|
298
|
+
|
|
299
|
+
def _f(y):
|
|
300
|
+
y = round(y)
|
|
301
|
+
return str(y) if y < 1000 else f"{y:.2e}".replace("e+", "e")
|
|
302
|
+
|
|
303
|
+
divved = _f(round(t) / x) if x != 0 else "inf"
|
|
304
|
+
return f"{_f(x)} (1/{divved})" if total else _f(x)
|
|
305
|
+
|
|
306
|
+
s.append(
|
|
307
|
+
f"{e}: {fmt(t, False)} total, {fmt(v)} valid, {fmt(ev)} evaluated, "
|
|
308
|
+
f"{fmt(p)} Pareto-Optimal"
|
|
309
|
+
)
|
|
310
|
+
return "\n".join(s)
|
accelforge/mapper.py
ADDED
|
File without changes
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from accelforge.model.main import evaluate_mapping
|
|
File without changes
|
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import Optional, overload
|
|
3
|
+
|
|
4
|
+
# from bindings.looptree import TemporalTag, SequentialTag, PipelineTemporalTag
|
|
5
|
+
|
|
6
|
+
import islpy as isl
|
|
7
|
+
|
|
8
|
+
from accelforge.model._looptree.reuse.isl import IslReuseAnalysisOutput
|
|
9
|
+
from accelforge.model._looptree.reuse.symbolic import (
|
|
10
|
+
BuffetStats,
|
|
11
|
+
SymbolicAnalysisOutput,
|
|
12
|
+
)
|
|
13
|
+
from accelforge.model._looptree.mapping_utilities import get_paths, get_leaves
|
|
14
|
+
|
|
15
|
+
from accelforge.frontend.mapping import Mapping, TensorHolder, Compute
|
|
16
|
+
from accelforge.frontend.workload import Workload
|
|
17
|
+
|
|
18
|
+
# from pytimeloop._isl.singular import get_sum_of_pw_qpolynomial
|
|
19
|
+
# from pytimeloop._isl.sum import sum_with_mask
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(eq=True)
|
|
23
|
+
class Accesses:
|
|
24
|
+
total_reads: float
|
|
25
|
+
total_writes: float
|
|
26
|
+
max_per_unit_reads: float
|
|
27
|
+
max_per_unit_writes: float
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class BufferAccesses:
|
|
31
|
+
def __init__(self):
|
|
32
|
+
self.accesses: dict[tuple, Accesses] = {}
|
|
33
|
+
|
|
34
|
+
def get_accesses(self, buffer, dspace, einsum) -> Accesses:
|
|
35
|
+
key = (buffer, dspace, einsum)
|
|
36
|
+
if key not in self.accesses:
|
|
37
|
+
self.accesses[key] = Accesses(0, 0, 0, 0)
|
|
38
|
+
return self.accesses[key]
|
|
39
|
+
|
|
40
|
+
def items(self):
|
|
41
|
+
return self.accesses.items()
|
|
42
|
+
|
|
43
|
+
def items_with_buffer(self, ref_buffer):
|
|
44
|
+
"""Returns iterator similar to `items` but only for `ref_buffer`"""
|
|
45
|
+
return (
|
|
46
|
+
((buffer, dspace, einsum), value)
|
|
47
|
+
for (buffer, dspace, einsum), value in self.accesses.items()
|
|
48
|
+
if buffer == ref_buffer
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
def __str__(self):
|
|
52
|
+
return repr(self.accesses)
|
|
53
|
+
|
|
54
|
+
def __repr__(self):
|
|
55
|
+
return f"BufferAccesses({repr(self.accesses)})"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@overload
|
|
59
|
+
def summarize_total_and_per_unit_actions(
|
|
60
|
+
reuse_analysis_result: IslReuseAnalysisOutput,
|
|
61
|
+
) -> dict[tuple, BuffetStats]:
|
|
62
|
+
pass
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@overload
|
|
66
|
+
def summarize_total_and_per_unit_actions(
|
|
67
|
+
reuse_analysis_result: SymbolicAnalysisOutput,
|
|
68
|
+
) -> dict[tuple, BuffetStats]:
|
|
69
|
+
pass
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def summarize_total_and_per_unit_actions(
|
|
73
|
+
reuse_analysis_result,
|
|
74
|
+
) -> dict[tuple, BuffetStats]:
|
|
75
|
+
result = {}
|
|
76
|
+
if isinstance(reuse_analysis_result, IslReuseAnalysisOutput):
|
|
77
|
+
reads_to_parent = reuse_analysis_result.reads_to_parent
|
|
78
|
+
reads_to_peer = reuse_analysis_result.reads_to_peer
|
|
79
|
+
for key, (tags, fill) in reuse_analysis_result.fills.items():
|
|
80
|
+
read_to_parent = reads_to_parent[key][1]
|
|
81
|
+
read_to_peer = reads_to_peer[key][1]
|
|
82
|
+
|
|
83
|
+
total_fill = get_sum_of_pw_qpolynomial(fill)
|
|
84
|
+
total_read_to_parent = get_sum_of_pw_qpolynomial(read_to_parent)
|
|
85
|
+
total_read_to_peer = get_sum_of_pw_qpolynomial(read_to_peer)
|
|
86
|
+
|
|
87
|
+
max_per_unit_fill = _sum_over_temporal_max_over_spatial(tags, fill)
|
|
88
|
+
|
|
89
|
+
n_read_to_parent_dim = read_to_parent.dim(isl.dim_type.in_)
|
|
90
|
+
max_per_unit_read_to_parent = _sum_over_temporal_max_over_spatial(
|
|
91
|
+
tags[:n_read_to_parent_dim], read_to_parent
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
max_per_unit_read_to_peer = _sum_over_temporal_max_over_spatial(
|
|
95
|
+
tags, read_to_peer
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
buffet_stats = BuffetStats(
|
|
99
|
+
total_fills=total_fill,
|
|
100
|
+
total_reads_to_parent=total_read_to_parent,
|
|
101
|
+
total_reads_to_peer=total_read_to_peer,
|
|
102
|
+
max_per_unit_fills=max_per_unit_fill,
|
|
103
|
+
max_per_parent_reads_to_parent=max_per_unit_read_to_parent,
|
|
104
|
+
max_per_unit_reads_to_peer=max_per_unit_read_to_peer,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
elif isinstance(reuse_analysis_result, SymbolicAnalysisOutput):
|
|
108
|
+
for buffet, buffet_stats in reuse_analysis_result.buffet_stats.items():
|
|
109
|
+
level = buffet.level
|
|
110
|
+
einsum = buffet.einsum
|
|
111
|
+
key = (level, buffet.tensor, einsum)
|
|
112
|
+
result[key] = buffet_stats
|
|
113
|
+
|
|
114
|
+
return result
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@overload
|
|
118
|
+
def isl_buffer_accesses_from_buffet_actions(
|
|
119
|
+
reuse_analysis_result: IslReuseAnalysisOutput, mapping, workload, is_path=False
|
|
120
|
+
) -> BufferAccesses:
|
|
121
|
+
pass
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@overload
|
|
125
|
+
def isl_buffer_accesses_from_buffet_actions(
|
|
126
|
+
reuse_analysis_result: SymbolicAnalysisOutput, mapping, workload, is_path=False
|
|
127
|
+
) -> BufferAccesses:
|
|
128
|
+
pass
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
# TODO: is_path should be removed and we should accept only regular mappings
|
|
132
|
+
def isl_buffer_accesses_from_buffet_actions(
|
|
133
|
+
reuse_analysis_result, mapping, workload: Workload, is_path=False
|
|
134
|
+
) -> BufferAccesses:
|
|
135
|
+
mapping = mapping.nodes
|
|
136
|
+
|
|
137
|
+
parent_buffers = get_parent_buffers(mapping, workload, is_path)
|
|
138
|
+
|
|
139
|
+
compute_targets = set()
|
|
140
|
+
for compute_node in get_leaves(mapping, is_path):
|
|
141
|
+
assert isinstance(compute_node, Compute)
|
|
142
|
+
compute_targets.add(compute_node.component)
|
|
143
|
+
|
|
144
|
+
summarized_actions = summarize_total_and_per_unit_actions(reuse_analysis_result)
|
|
145
|
+
|
|
146
|
+
accesses_results = BufferAccesses()
|
|
147
|
+
for (buffer_id, tensor, einsum), stats in summarized_actions.items():
|
|
148
|
+
fill = stats.total_fills # Writes
|
|
149
|
+
read_to_parent = stats.total_reads_to_parent # Reads to parent
|
|
150
|
+
read_to_peer = stats.total_reads_to_peer # Reads to peer
|
|
151
|
+
max_per_unit_fill = stats.max_per_unit_fills
|
|
152
|
+
max_per_parent_read_to_parent = stats.max_per_parent_reads_to_parent
|
|
153
|
+
max_per_unit_read_to_peer = stats.max_per_unit_reads_to_peer
|
|
154
|
+
|
|
155
|
+
parent_buffer = parent_buffers[(buffer_id, tensor, einsum)]
|
|
156
|
+
if parent_buffer is not None:
|
|
157
|
+
parent_is_backing = parent_buffers[(parent_buffer, tensor, einsum)] is None
|
|
158
|
+
|
|
159
|
+
accesses = accesses_results.get_accesses(parent_buffer, tensor, einsum)
|
|
160
|
+
if tensor in workload.einsums[einsum].output_tensor_names:
|
|
161
|
+
accesses.total_writes += read_to_parent
|
|
162
|
+
accesses.total_reads += read_to_parent
|
|
163
|
+
|
|
164
|
+
accesses.max_per_unit_reads += max_per_parent_read_to_parent
|
|
165
|
+
accesses.max_per_unit_writes += max_per_parent_read_to_parent
|
|
166
|
+
|
|
167
|
+
if read_to_parent == 0 and max_per_parent_read_to_parent != 0:
|
|
168
|
+
raise ValueError(
|
|
169
|
+
f"read_to_parent is 0 but max_per_parent_read_to_parent is {max_per_parent_read_to_parent}"
|
|
170
|
+
)
|
|
171
|
+
if read_to_parent == 0 and max_per_unit_read_to_peer == 0:
|
|
172
|
+
per_unit_to_total = 0
|
|
173
|
+
else:
|
|
174
|
+
per_unit_to_total = max_per_parent_read_to_parent / read_to_parent
|
|
175
|
+
|
|
176
|
+
# TODO: Do this per unit properly by recursing on first iteration in symbolic.py
|
|
177
|
+
# and passing a flag that says whether this is first iteration
|
|
178
|
+
if parent_is_backing:
|
|
179
|
+
elidable_reads = reuse_analysis_result.elidable_reads.get(tensor, 0)
|
|
180
|
+
accesses.total_reads -= elidable_reads
|
|
181
|
+
accesses.max_per_unit_reads -= per_unit_to_total * elidable_reads
|
|
182
|
+
|
|
183
|
+
elif tensor in workload.einsums[einsum].input_tensor_names:
|
|
184
|
+
accesses.total_reads += read_to_parent
|
|
185
|
+
|
|
186
|
+
accesses.max_per_unit_reads += max_per_parent_read_to_parent
|
|
187
|
+
|
|
188
|
+
# Fills will write into current buffer except for compute (which does
|
|
189
|
+
# not have write action) and top-level buffer
|
|
190
|
+
if buffer_id not in compute_targets and parent_buffer is not None:
|
|
191
|
+
accesses = accesses_results.get_accesses(buffer_id, tensor, einsum)
|
|
192
|
+
if tensor in workload.einsums[einsum].output_tensor_names:
|
|
193
|
+
accesses.total_writes += fill
|
|
194
|
+
accesses.max_per_unit_writes += max_per_unit_fill
|
|
195
|
+
|
|
196
|
+
# # TODO: figure out how to do this per unit
|
|
197
|
+
# total_elided_writes = get_tensor_size(workload, tensor)
|
|
198
|
+
# accesses.total_writes -= total_elided_writes
|
|
199
|
+
else:
|
|
200
|
+
accesses.total_writes += fill
|
|
201
|
+
accesses.max_per_unit_writes += max_per_unit_fill
|
|
202
|
+
|
|
203
|
+
accesses.total_reads += read_to_peer
|
|
204
|
+
accesses.max_per_unit_reads += max_per_unit_read_to_peer
|
|
205
|
+
|
|
206
|
+
return accesses_results
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def get_parent_buffers(mapping: Mapping, workload: Workload, is_path):
|
|
210
|
+
parent_buffers = {}
|
|
211
|
+
if is_path:
|
|
212
|
+
paths = [mapping]
|
|
213
|
+
else:
|
|
214
|
+
paths = get_paths(mapping)
|
|
215
|
+
|
|
216
|
+
for path in paths:
|
|
217
|
+
leaf = path[-1]
|
|
218
|
+
einsum = leaf.einsum
|
|
219
|
+
|
|
220
|
+
tensor_to_top_buffer = {}
|
|
221
|
+
for node in path:
|
|
222
|
+
if isinstance(node, TensorHolder):
|
|
223
|
+
for tensor in node.tensors:
|
|
224
|
+
key = (node.component, tensor, einsum)
|
|
225
|
+
if tensor in tensor_to_top_buffer:
|
|
226
|
+
parent_buffers[key] = tensor_to_top_buffer[tensor]
|
|
227
|
+
else:
|
|
228
|
+
parent_buffers[key] = None
|
|
229
|
+
tensor_to_top_buffer[tensor] = node.component
|
|
230
|
+
elif isinstance(node, Compute):
|
|
231
|
+
for tensor in workload.einsums[einsum].input_tensor_names:
|
|
232
|
+
key = (node.component, tensor, einsum)
|
|
233
|
+
if tensor in tensor_to_top_buffer:
|
|
234
|
+
parent_buffers[key] = tensor_to_top_buffer[tensor]
|
|
235
|
+
for tensor in workload.einsums[einsum].output_tensor_names:
|
|
236
|
+
key = (node.component, tensor, einsum)
|
|
237
|
+
if tensor in tensor_to_top_buffer:
|
|
238
|
+
parent_buffers[key] = tensor_to_top_buffer[tensor]
|
|
239
|
+
|
|
240
|
+
return parent_buffers
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def _sum_over_temporal_max_over_spatial(tags, actions):
|
|
244
|
+
return (
|
|
245
|
+
sum_with_mask(
|
|
246
|
+
[
|
|
247
|
+
(
|
|
248
|
+
isinstance(t, TemporalTag)
|
|
249
|
+
or isinstance(t, PipelineTemporalTag)
|
|
250
|
+
or isinstance(t, SequentialTag)
|
|
251
|
+
)
|
|
252
|
+
for t in tags
|
|
253
|
+
],
|
|
254
|
+
actions,
|
|
255
|
+
)
|
|
256
|
+
.max()
|
|
257
|
+
.to_python()
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def isl_buffer_accesses_from_buffet_actions(
|
|
262
|
+
reuse_analysis_result, mapping, workload: Workload, is_path=False
|
|
263
|
+
) -> BufferAccesses:
|
|
264
|
+
mapping = mapping.nodes
|
|
265
|
+
|
|
266
|
+
parent_buffers = get_parent_buffers(mapping, workload, is_path)
|
|
267
|
+
|
|
268
|
+
compute_targets = set()
|
|
269
|
+
for compute_node in get_leaves(mapping, is_path):
|
|
270
|
+
assert isinstance(compute_node, Compute)
|
|
271
|
+
compute_targets.add(compute_node.component)
|
|
272
|
+
|
|
273
|
+
summarized_actions = summarize_total_and_per_unit_actions(reuse_analysis_result)
|
|
274
|
+
|
|
275
|
+
accesses_results = BufferAccesses()
|
|
276
|
+
for (buffer_id, tensor, einsum), stats in summarized_actions.items():
|
|
277
|
+
fill = stats.total_fills # Writes
|
|
278
|
+
read_to_parent = stats.total_reads_to_parent # Reads to parent
|
|
279
|
+
read_to_peer = stats.total_reads_to_peer # Reads to peer
|
|
280
|
+
max_per_unit_fill = stats.max_per_unit_fills
|
|
281
|
+
max_per_parent_read_to_parent = stats.max_per_parent_reads_to_parent
|
|
282
|
+
max_per_unit_read_to_peer = stats.max_per_unit_reads_to_peer
|
|
283
|
+
|
|
284
|
+
parent_buffer = parent_buffers[(buffer_id, tensor, einsum)]
|
|
285
|
+
if parent_buffer is not None:
|
|
286
|
+
parent_is_backing = parent_buffers[(parent_buffer, tensor, einsum)] is None
|
|
287
|
+
|
|
288
|
+
accesses = accesses_results.get_accesses(parent_buffer, tensor, einsum)
|
|
289
|
+
if tensor in workload.einsums[einsum].output_tensor_names:
|
|
290
|
+
accesses.total_writes += read_to_parent
|
|
291
|
+
accesses.total_reads += read_to_parent
|
|
292
|
+
|
|
293
|
+
accesses.max_per_unit_reads += max_per_parent_read_to_parent
|
|
294
|
+
accesses.max_per_unit_writes += max_per_parent_read_to_parent
|
|
295
|
+
|
|
296
|
+
if read_to_parent == 0 and max_per_parent_read_to_parent != 0:
|
|
297
|
+
raise ValueError(
|
|
298
|
+
f"read_to_parent is 0 but max_per_parent_read_to_parent is {max_per_parent_read_to_parent}"
|
|
299
|
+
)
|
|
300
|
+
if read_to_parent == 0 and max_per_unit_read_to_peer == 0:
|
|
301
|
+
per_unit_to_total = 0
|
|
302
|
+
else:
|
|
303
|
+
per_unit_to_total = max_per_parent_read_to_parent / read_to_parent
|
|
304
|
+
|
|
305
|
+
# TODO: Do this per unit properly by recursing on first iteration in symbolic.py
|
|
306
|
+
# and passing a flag that says whether this is first iteration
|
|
307
|
+
if parent_is_backing:
|
|
308
|
+
elidable_reads = reuse_analysis_result.elidable_reads.get(tensor, 0)
|
|
309
|
+
accesses.total_reads -= elidable_reads
|
|
310
|
+
accesses.max_per_unit_reads -= per_unit_to_total * elidable_reads
|
|
311
|
+
|
|
312
|
+
elif tensor in workload.einsums[einsum].input_tensor_names:
|
|
313
|
+
accesses.total_reads += read_to_parent
|
|
314
|
+
|
|
315
|
+
accesses.max_per_unit_reads += max_per_parent_read_to_parent
|
|
316
|
+
|
|
317
|
+
# Fills will write into current buffer except for compute (which does
|
|
318
|
+
# not have write action) and top-level buffer
|
|
319
|
+
if buffer_id not in compute_targets and parent_buffer is not None:
|
|
320
|
+
accesses = accesses_results.get_accesses(buffer_id, tensor, einsum)
|
|
321
|
+
if tensor in workload.einsums[einsum].output_tensor_names:
|
|
322
|
+
accesses.total_writes += fill
|
|
323
|
+
accesses.max_per_unit_writes += max_per_unit_fill
|
|
324
|
+
|
|
325
|
+
# # TODO: figure out how to do this per unit
|
|
326
|
+
# total_elided_writes = get_tensor_size(workload, tensor)
|
|
327
|
+
# accesses.total_writes -= total_elided_writes
|
|
328
|
+
else:
|
|
329
|
+
accesses.total_writes += fill
|
|
330
|
+
accesses.max_per_unit_writes += max_per_unit_fill
|
|
331
|
+
|
|
332
|
+
accesses.total_reads += read_to_peer
|
|
333
|
+
accesses.max_per_unit_reads += max_per_unit_read_to_peer
|
|
334
|
+
|
|
335
|
+
return accesses_results
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .capacity import compute_capacity_usage
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
def compute_max(child_caps, caps):
|
|
2
|
+
buf_to_max = {}
|
|
3
|
+
for child_cap in child_caps:
|
|
4
|
+
buf_to_child_usage = {}
|
|
5
|
+
|
|
6
|
+
for buf, v in child_cap.items():
|
|
7
|
+
if buf not in buf_to_child_usage:
|
|
8
|
+
buf_to_child_usage[buf] = 0
|
|
9
|
+
if buf not in buf_to_max:
|
|
10
|
+
buf_to_max[buf] = 0
|
|
11
|
+
|
|
12
|
+
buf_to_child_usage[buf] += v
|
|
13
|
+
|
|
14
|
+
for buf in buf_to_child_usage:
|
|
15
|
+
buf_to_max[buf] = max(buf_to_max[buf], buf_to_child_usage[buf])
|
|
16
|
+
|
|
17
|
+
for buf, c in buf_to_max.items():
|
|
18
|
+
if buf not in caps:
|
|
19
|
+
caps[buf] = 0
|
|
20
|
+
caps[buf] += c
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def compute_total(child_caps, caps):
|
|
24
|
+
for child_cap in child_caps:
|
|
25
|
+
for buf, v in child_cap.items():
|
|
26
|
+
if buf in caps:
|
|
27
|
+
caps[buf] += v
|
|
28
|
+
else:
|
|
29
|
+
caps[buf] = v
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
CAPACITY_AGGREGATORS = {
|
|
33
|
+
"sequential": compute_max,
|
|
34
|
+
"pipeline": compute_total,
|
|
35
|
+
"parallel": compute_total,
|
|
36
|
+
}
|