accelforge 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- accelforge/__init__.py +21 -0
- accelforge/_accelerated_imports.py +16 -0
- accelforge/_deprecate/_simanneal/evalmapping.py +271 -0
- accelforge/_deprecate/_simanneal/mapspaceglobals.py +298 -0
- accelforge/_deprecate/_simanneal/simanneal.py +666 -0
- accelforge/_deprecate/_simanneal/tracking.py +105 -0
- accelforge/_deprecate/_simanneal/wrappers.py +218 -0
- accelforge/_deprecate/_simanneal2/__init__.py +7 -0
- accelforge/_deprecate/_simanneal2/simanneal.py +493 -0
- accelforge/_deprecate/_simanneal2/tracking.py +116 -0
- accelforge/_deprecate/compatibility_util.py +181 -0
- accelforge/_deprecate/layerdeduplication/__init__.py +2 -0
- accelforge/_deprecate/layerdeduplication/group_similar_einsums.py +160 -0
- accelforge/_deprecate/layerdeduplication/grouped_einsums.py +84 -0
- accelforge/_deprecate/mapping_filter_tags/__init__.py +2 -0
- accelforge/_deprecate/mapping_filter_tags/ffmt.py +212 -0
- accelforge/_deprecate/mapping_filter_tags/onesplit.py +24 -0
- accelforge/_deprecate/mapping_filter_tags/util.py +24 -0
- accelforge/_deprecate/tags.py +69 -0
- accelforge/_deprecate/viz/__init__.py +0 -0
- accelforge/_deprecate/viz/interactive.py +159 -0
- accelforge/_deprecate/viz/reservationtree.py +307 -0
- accelforge/_deprecate/viz/ski_slope.py +88 -0
- accelforge/_version.py +15 -0
- accelforge/examples.py +39 -0
- accelforge/frontend/__init__.py +10 -0
- accelforge/frontend/_binding.py +129 -0
- accelforge/frontend/_workload_isl/__init__.py +2 -0
- accelforge/frontend/_workload_isl/_isl.py +149 -0
- accelforge/frontend/_workload_isl/_symbolic.py +141 -0
- accelforge/frontend/arch copy.py +1544 -0
- accelforge/frontend/arch.py +1642 -0
- accelforge/frontend/config.py +63 -0
- accelforge/frontend/mapper/__init__.py +5 -0
- accelforge/frontend/mapper/ffm.py +126 -0
- accelforge/frontend/mapper/mapper.py +7 -0
- accelforge/frontend/mapper/metrics.py +30 -0
- accelforge/frontend/mapping/__init__.py +1 -0
- accelforge/frontend/mapping/mapping.py +1736 -0
- accelforge/frontend/model.py +14 -0
- accelforge/frontend/renames.py +150 -0
- accelforge/frontend/spec copy.py +230 -0
- accelforge/frontend/spec.py +301 -0
- accelforge/frontend/variables.py +12 -0
- accelforge/frontend/workload.py +952 -0
- accelforge/mapper/FFM/__init__.py +9 -0
- accelforge/mapper/FFM/_join_pmappings/__init__.py +0 -0
- accelforge/mapper/FFM/_join_pmappings/compatibility.py +653 -0
- accelforge/mapper/FFM/_join_pmappings/compress_pmappings.py +140 -0
- accelforge/mapper/FFM/_join_pmappings/join_pmappings.py +703 -0
- accelforge/mapper/FFM/_join_pmappings/pmapping_dataframe.py +901 -0
- accelforge/mapper/FFM/_join_pmappings/pmapping_group.py +337 -0
- accelforge/mapper/FFM/_make_pmappings/contraints/__init__.py +0 -0
- accelforge/mapper/FFM/_make_pmappings/contraints/constraints.py +360 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/__init__.py +1 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_loops.py +373 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_pmapping_templates.py +463 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_reservations.py +95 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storage_order.py +382 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storages.py +155 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings.py +411 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/__init__.py +1 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_pmappings_from_templates.py +407 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_tile_shapes.py +1681 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/run_model.py +170 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/symbol_relations.py +174 -0
- accelforge/mapper/FFM/_make_pmappings/pmapper_job.py +282 -0
- accelforge/mapper/FFM/_pareto_df/df_convention.py +273 -0
- accelforge/mapper/FFM/_pareto_df/pareto copy.py +836 -0
- accelforge/mapper/FFM/_pareto_df/pareto.py +508 -0
- accelforge/mapper/FFM/data.py +61 -0
- accelforge/mapper/FFM/main copy.py +236 -0
- accelforge/mapper/FFM/main.py +208 -0
- accelforge/mapper/FFM/mappings.py +510 -0
- accelforge/mapper/FFM/pmappings.py +310 -0
- accelforge/mapper/__init__.py +4 -0
- accelforge/mapper.py +0 -0
- accelforge/model/__init__.py +1 -0
- accelforge/model/_looptree/__init__.py +0 -0
- accelforge/model/_looptree/accesses.py +335 -0
- accelforge/model/_looptree/capacity/__init__.py +1 -0
- accelforge/model/_looptree/capacity/aggregators.py +36 -0
- accelforge/model/_looptree/capacity/capacity.py +47 -0
- accelforge/model/_looptree/energy.py +150 -0
- accelforge/model/_looptree/equivalent_ranks.py +29 -0
- accelforge/model/_looptree/latency/__init__.py +1 -0
- accelforge/model/_looptree/latency/latency.py +98 -0
- accelforge/model/_looptree/latency/memory.py +120 -0
- accelforge/model/_looptree/latency/processors.py +92 -0
- accelforge/model/_looptree/mapping_utilities.py +71 -0
- accelforge/model/_looptree/reuse/__init__.py +4 -0
- accelforge/model/_looptree/reuse/isl/__init__.py +1 -0
- accelforge/model/_looptree/reuse/isl/des.py +59 -0
- accelforge/model/_looptree/reuse/isl/isl_functions.py +374 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/__init__.py +4 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/analyze_mapping.py +297 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/skews_from_mapping.py +236 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/tiling.py +685 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/types.py +188 -0
- accelforge/model/_looptree/reuse/isl/spatial.py +260 -0
- accelforge/model/_looptree/reuse/isl/temporal.py +182 -0
- accelforge/model/_looptree/reuse/symbolic/__init__.py +1 -0
- accelforge/model/_looptree/reuse/symbolic/symbolic copy 2.py +1346 -0
- accelforge/model/_looptree/reuse/symbolic/symbolic copy.py +1408 -0
- accelforge/model/_looptree/reuse/symbolic/symbolic.py +1396 -0
- accelforge/model/_looptree/run.py +122 -0
- accelforge/model/_looptree/types.py +26 -0
- accelforge/model/_looptree/visualization/__init__.py +0 -0
- accelforge/model/_looptree/visualization/occupancy.py +11 -0
- accelforge/model/main.py +222 -0
- accelforge/plotting/__init__.py +2 -0
- accelforge/plotting/mappings.py +219 -0
- accelforge/plotting/specs.py +57 -0
- accelforge/util/__init__.py +4 -0
- accelforge/util/_base_analysis_types.py +24 -0
- accelforge/util/_basetypes.py +1089 -0
- accelforge/util/_frozenset.py +36 -0
- accelforge/util/_isl.py +29 -0
- accelforge/util/_itertools.py +14 -0
- accelforge/util/_mathfuncs.py +57 -0
- accelforge/util/_parse_expressions.py +339 -0
- accelforge/util/_picklecache.py +32 -0
- accelforge/util/_setexpressions.py +268 -0
- accelforge/util/_sympy/__init__.py +0 -0
- accelforge/util/_sympy/broadcast_max.py +18 -0
- accelforge/util/_visualization.py +112 -0
- accelforge/util/_yaml.py +579 -0
- accelforge/util/parallel.py +193 -0
- accelforge-0.0.1.dist-info/METADATA +64 -0
- accelforge-0.0.1.dist-info/RECORD +258 -0
- accelforge-0.0.1.dist-info/WHEEL +5 -0
- accelforge-0.0.1.dist-info/licenses/LICENSE +19 -0
- accelforge-0.0.1.dist-info/top_level.txt +5 -0
- docs/_build/html/_sources/fastfusion.frontend.mapper.rst.txt +37 -0
- docs/_build/html/_sources/fastfusion.frontend.rst.txt +70 -0
- docs/_build/html/_sources/fastfusion.frontend.workload.rst.txt +21 -0
- docs/_build/html/_sources/fastfusion.mapper.FFM.rst.txt +37 -0
- docs/_build/html/_sources/fastfusion.mapper.rst.txt +18 -0
- docs/_build/html/_sources/fastfusion.rst.txt +20 -0
- docs/_build/html/_sources/fastfusion.util.rst.txt +21 -0
- docs/_build/html/_sources/index.rst.txt +87 -0
- docs/_build/html/_sources/modules.rst.txt +7 -0
- docs/_build/html/_sources/notes/citation.rst.txt +45 -0
- docs/_build/html/_sources/notes/definitions.rst.txt +43 -0
- docs/_build/html/_sources/notes/faqs.rst.txt +39 -0
- docs/_build/html/_sources/notes/modeling/accelerator_energy_latency.rst.txt +72 -0
- docs/_build/html/_sources/notes/modeling/component_energy_area.rst.txt +96 -0
- docs/_build/html/_sources/notes/modeling/mapping.rst.txt +100 -0
- docs/_build/html/_sources/notes/modeling.rst.txt +33 -0
- docs/_build/html/_sources/notes/parsing/arithmetic_parsing.rst.txt +136 -0
- docs/_build/html/_sources/notes/parsing/setexpressions.rst.txt +63 -0
- docs/_build/html/_sources/notes/parsing/yaml_parsing.rst.txt +176 -0
- docs/_build/html/_sources/notes/quickstart_and_installation.rst.txt +9 -0
- docs/_build/html/_sources/notes/spec/architecture.rst.txt +133 -0
- docs/_build/html/_sources/notes/spec/mapping.rst.txt +12 -0
- docs/_build/html/_sources/notes/spec/workload.rst.txt +83 -0
- docs/_build/html/_sources/notes/spec.rst.txt +36 -0
- docs/source/_ext/include_attrs.py +213 -0
- docs/source/_ext/include_docstring.py +364 -0
- docs/source/_ext/include_functions.py +154 -0
- docs/source/_ext/include_notebook.py +131 -0
- docs/source/_ext/include_yaml.py +119 -0
- docs/source/_ext/inherited_attributes.py +222 -0
- docs/source/_ext/paths.py +4 -0
- docs/source/conf.py +79 -0
- examples/arches/compute_in_memory/_include.yaml +74 -0
- examples/arches/compute_in_memory/_include_functions.py +229 -0
- examples/arches/compute_in_memory/_load_spec.py +57 -0
- examples/arches/compute_in_memory/components/c2c_multiplier.py +181 -0
- examples/arches/compute_in_memory/components/dac_c2c_r2r.py +605 -0
- examples/arches/compute_in_memory/components/misc.py +195 -0
- examples/arches/compute_in_memory/components/util/bit_functions.py +51 -0
- examples/arches/compute_in_memory/components/zero_comparator.py +92 -0
- examples/arches/compute_in_memory/isaac.yaml +233 -0
- examples/arches/compute_in_memory/memory_cells/ecram_demo.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/rram_example.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/rram_isaac_isca_2016.yaml +64 -0
- examples/arches/compute_in_memory/memory_cells/rram_neurosim_default.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/rram_raella_isca_2023.yaml +70 -0
- examples/arches/compute_in_memory/memory_cells/rram_wan_nature_2022.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_colonnade_jssc_2021.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_example.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_jia_jssc_2020.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_sinangil_jssc_2021.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_wang_vlsi_2022.yaml +63 -0
- examples/arches/compute_in_memory/wang_vlsi_2022.yaml +289 -0
- examples/arches/eyeriss.yaml +68 -0
- examples/arches/fanout_variations/at_glb.yaml +31 -0
- examples/arches/fanout_variations/at_glb_with_fanout_node.yaml +34 -0
- examples/arches/fanout_variations/at_mac.yaml +31 -0
- examples/arches/fanout_variations/at_mac_with_constraints.yaml +38 -0
- examples/arches/fanout_variations/at_mac_with_fanout_node.yaml +34 -0
- examples/arches/nvdla.yaml +47 -0
- examples/arches/simple.yaml +28 -0
- examples/arches/tpu_v4i.yaml +67 -0
- examples/mappings/unfused_matmuls_to_simple.yaml +33 -0
- examples/misc/component_annotated.yaml +33 -0
- examples/workloads/gpt3_6.7B.yaml +124 -0
- examples/workloads/matmuls.yaml +20 -0
- examples/workloads/mobilenet_28.yaml +81 -0
- examples/workloads/mobilenet_various_separate.yaml +106 -0
- examples/workloads/three_matmuls_annotated.yaml +59 -0
- notebooks/.ipynb_checkpoints/fastfusion_arch_study_michael-checkpoint.ipynb +359 -0
- notebooks/compute_in_memory/_scripts.py +339 -0
- notebooks/compute_in_memory/isaac.guide.ipynb +270 -0
- notebooks/compute_in_memory/wang_vlsi_2022.ipynb +602 -0
- notebooks/paths.py +4 -0
- notebooks/tutorials/.ipynb_checkpoints/1_FFM-checkpoint.ipynb +3110 -0
- notebooks/tutorials/FFM.ipynb +3498 -0
- notebooks/tutorials/_include.py +48 -0
- notebooks/tutorials/component_energy_area.ipynb +363 -0
- tests/Q_mapping.yaml +38 -0
- tests/__init__.py +0 -0
- tests/conv.mapping.yaml +27 -0
- tests/conv.workload.yaml +13 -0
- tests/conv_sym.mapping.yaml +43 -0
- tests/copy.mapping.yaml +35 -0
- tests/copy.workload.yaml +15 -0
- tests/distribuffers/__init__.py +0 -0
- tests/distribuffers/multicast/test_cases.yaml +482 -0
- tests/distribuffers/spec/binding/valid_bindings.yaml +97 -0
- tests/distribuffers/spec/distributed.yaml +100 -0
- tests/distribuffers/spec/logical_arch.yaml +32 -0
- tests/distribuffers/spec/physical_arch.yaml +69 -0
- tests/distribuffers/test_binding.py +48 -0
- tests/frontend/__init__.py +0 -0
- tests/frontend/test_mapping_viz.py +52 -0
- tests/mapper/__init__.py +0 -0
- tests/mapper/configs/conv1d/conv1d.mapping.yaml +31 -0
- tests/mapper/configs/conv1d/conv1d.workload.yaml +11 -0
- tests/mapper/configs/two_conv1d/two_conv1d.expected.yaml +38 -0
- tests/mapper/configs/two_conv1d/two_conv1d.mapping.yaml +54 -0
- tests/mapper/configs/two_conv1d/two_conv1d.workload.yaml +19 -0
- tests/mapper/test_mapping_to_isl.py +90 -0
- tests/mapper/test_spatial_reuse_analysis.py +67 -0
- tests/mapper/test_temporal_reuse_analysis.py +56 -0
- tests/mapper/util.py +58 -0
- tests/matmul.mapping.yaml +29 -0
- tests/matmul.workload.yaml +12 -0
- tests/matmul_spatial.mapping.yaml +44 -0
- tests/mha.renames.yaml +65 -0
- tests/mha.workload.yaml +67 -0
- tests/mha.yaml +59 -0
- tests/mha_full.workload.yaml +67 -0
- tests/mobilenet.workload.yaml +35 -0
- tests/mobilenet_long.workload.yaml +64 -0
- tests/pmappingcache.py +24 -0
- tests/processing_stage.arch.yaml +40 -0
- tests/snowcat.arch.yaml +36 -0
- tests/test_ffm_join_pmappings.py +106 -0
- tests/test_ffm_make_pmappings.py +82 -0
- tests/test_ffm_make_tile_shapes.py +49 -0
- tests/test_mapper.py +100 -0
- tests/test_model.py +37 -0
- tests/test_plotting.py +72 -0
- tests/test_processing_stage.py +46 -0
- tests/test_symbolic_model.py +248 -0
- tests/test_workload.py +141 -0
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from .aggregators import CAPACITY_AGGREGATORS
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def get_value_from_singular_qpolynomial(qp):
|
|
5
|
+
return qp.eval(qp.domain().sample_point())
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def compute_capacity_usage(mapping, occupancy, workload):
|
|
9
|
+
caps = {}
|
|
10
|
+
tensor_name_to_id = workload.data_space_name_to_id()
|
|
11
|
+
einsum_name_to_id = workload.einsum_name_to_id()
|
|
12
|
+
|
|
13
|
+
for node in mapping:
|
|
14
|
+
einsums = get_einsums(mapping)
|
|
15
|
+
if node["type"] == "storage":
|
|
16
|
+
buf = node["target"]
|
|
17
|
+
if buf not in caps:
|
|
18
|
+
caps[buf] = 0
|
|
19
|
+
|
|
20
|
+
for tensor in node["dspace"]:
|
|
21
|
+
tensor_id = tensor_name_to_id[tensor]
|
|
22
|
+
max_cap = 0
|
|
23
|
+
for einsum in einsums:
|
|
24
|
+
einsum_id = einsum_name_to_id[einsum]
|
|
25
|
+
key = (buf, tensor_id, einsum_id)
|
|
26
|
+
if key in occupancy:
|
|
27
|
+
max_cap = max(
|
|
28
|
+
max_cap,
|
|
29
|
+
get_value_from_singular_qpolynomial(occupancy[key][1]),
|
|
30
|
+
)
|
|
31
|
+
caps[buf] += max_cap
|
|
32
|
+
|
|
33
|
+
elif node["type"] in ["sequential", "parallel", "pipeline"]:
|
|
34
|
+
aggregate_capacity = CAPACITY_AGGREGATORS[node["type"]]
|
|
35
|
+
child_caps = [
|
|
36
|
+
compute_capacity_usage(b, occupancy, workload) for b in node["branches"]
|
|
37
|
+
]
|
|
38
|
+
aggregate_capacity(child_caps, caps)
|
|
39
|
+
return caps
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_einsums(mapping):
|
|
43
|
+
for node in mapping:
|
|
44
|
+
if node["type"] in ["sequential", "parallel", "pipeline"]:
|
|
45
|
+
return sum((get_einsums(b) for b in node["branches"]), start=[])
|
|
46
|
+
elif node["type"] == "compute":
|
|
47
|
+
return [node["einsum"]]
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
from collections.abc import Mapping as MappingABC
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
import logging
|
|
5
|
+
from numbers import Number
|
|
6
|
+
from numbers import Real
|
|
7
|
+
|
|
8
|
+
from accelforge.frontend import arch
|
|
9
|
+
from accelforge.frontend.mapping.mapping import MappingNode
|
|
10
|
+
from accelforge.frontend.spec import Spec
|
|
11
|
+
from accelforge.model._looptree.reuse.symbolic import SymbolicAnalysisOutput
|
|
12
|
+
from accelforge.util._base_analysis_types import (
|
|
13
|
+
ActionCount,
|
|
14
|
+
ActionKey,
|
|
15
|
+
VerboseActionKey,
|
|
16
|
+
)
|
|
17
|
+
from accelforge.frontend.workload import Workload
|
|
18
|
+
from accelforge.frontend.mapping import Mapping
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def gather_actions(
|
|
22
|
+
looptree_results: SymbolicAnalysisOutput,
|
|
23
|
+
bindings: dict[str, str],
|
|
24
|
+
verbose: bool = False,
|
|
25
|
+
use_name=False,
|
|
26
|
+
):
|
|
27
|
+
actions: dict[tuple[str, str], ActionCount] = {}
|
|
28
|
+
compute_levels = set(c.level for c in looptree_results.compute_stats)
|
|
29
|
+
|
|
30
|
+
buffet_keyer = _get_buffet_keyer(verbose, use_name, bindings)
|
|
31
|
+
compute_keyer = _get_compute_keyer(verbose, use_name, bindings)
|
|
32
|
+
|
|
33
|
+
for buffet, accesses in looptree_results.buffet_stats.items():
|
|
34
|
+
if buffet.level in compute_levels:
|
|
35
|
+
continue
|
|
36
|
+
|
|
37
|
+
level = buffet.level
|
|
38
|
+
|
|
39
|
+
if use_name:
|
|
40
|
+
level = level
|
|
41
|
+
else:
|
|
42
|
+
level = bindings[level]
|
|
43
|
+
|
|
44
|
+
key = buffet_keyer(buffet, "read")
|
|
45
|
+
if key not in actions:
|
|
46
|
+
actions[key] = ActionCount.default()
|
|
47
|
+
actions[key].total += accesses.net_total_read_actions()
|
|
48
|
+
actions[key].max_per_unit += accesses.net_max_per_unit_read_actions()
|
|
49
|
+
|
|
50
|
+
key = buffet_keyer(buffet, "write")
|
|
51
|
+
if key not in actions:
|
|
52
|
+
actions[key] = ActionCount.default()
|
|
53
|
+
actions[key].total += accesses.net_total_write_actions()
|
|
54
|
+
actions[key].max_per_unit += accesses.net_max_per_unit_write_actions()
|
|
55
|
+
|
|
56
|
+
for compute, ops in looptree_results.compute_stats.items():
|
|
57
|
+
key = compute_keyer(compute, "compute")
|
|
58
|
+
if key not in actions:
|
|
59
|
+
actions[key] = ActionCount.default()
|
|
60
|
+
actions[key].total += ops.total_ops
|
|
61
|
+
actions[key].max_per_unit += ops.max_per_unit_ops
|
|
62
|
+
|
|
63
|
+
return actions
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _get_buffet_keyer(verbose, use_name, bindings):
|
|
67
|
+
if not verbose:
|
|
68
|
+
|
|
69
|
+
def get_buffet_key(buffet, action_name) -> ActionKey:
|
|
70
|
+
level = buffet.level
|
|
71
|
+
if use_name:
|
|
72
|
+
level = level
|
|
73
|
+
else:
|
|
74
|
+
level = bindings[level]
|
|
75
|
+
return ActionKey(level, action_name)
|
|
76
|
+
|
|
77
|
+
else:
|
|
78
|
+
|
|
79
|
+
def get_buffet_key(buffet, action_name) -> VerboseActionKey:
|
|
80
|
+
level = buffet.level
|
|
81
|
+
if use_name:
|
|
82
|
+
level = level
|
|
83
|
+
else:
|
|
84
|
+
level = bindings[level]
|
|
85
|
+
return VerboseActionKey(level, action_name, buffet.tensor, buffet.einsum)
|
|
86
|
+
|
|
87
|
+
return get_buffet_key
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _get_compute_keyer(verbose, use_name, bindings):
|
|
91
|
+
if not verbose:
|
|
92
|
+
|
|
93
|
+
def compute_keyer(compute, action_name):
|
|
94
|
+
level = compute.level
|
|
95
|
+
if use_name:
|
|
96
|
+
level = level
|
|
97
|
+
else:
|
|
98
|
+
level = bindings[level]
|
|
99
|
+
return ActionKey(level, action_name)
|
|
100
|
+
|
|
101
|
+
else:
|
|
102
|
+
|
|
103
|
+
def compute_keyer(compute, action_name):
|
|
104
|
+
level = compute.level
|
|
105
|
+
if use_name:
|
|
106
|
+
level = level
|
|
107
|
+
else:
|
|
108
|
+
level = bindings[level]
|
|
109
|
+
return VerboseActionKey(level, action_name, None, compute.einsum)
|
|
110
|
+
|
|
111
|
+
return compute_keyer
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def compute_energy_from_actions(
|
|
115
|
+
spec: Spec,
|
|
116
|
+
action_counts: MappingABC[ActionKey, Real],
|
|
117
|
+
overall_latency: float,
|
|
118
|
+
component_to_non_power_gated_porp: dict[str, int] = None,
|
|
119
|
+
) -> dict[ActionKey | VerboseActionKey, Number]:
|
|
120
|
+
if component_to_non_power_gated_porp is None:
|
|
121
|
+
logging.warning(
|
|
122
|
+
"No component_to_non_power_gated_porp provided, will not account for power gating."
|
|
123
|
+
)
|
|
124
|
+
component_to_non_power_gated_porp = {}
|
|
125
|
+
|
|
126
|
+
energy_result = {}
|
|
127
|
+
components = {}
|
|
128
|
+
for key, counts in action_counts.items():
|
|
129
|
+
if counts.total == 0:
|
|
130
|
+
continue
|
|
131
|
+
if key.level not in components:
|
|
132
|
+
components[key.level] = spec.arch.find(key.level)
|
|
133
|
+
component_obj = components[key.level]
|
|
134
|
+
try:
|
|
135
|
+
energy_per_ac = component_obj.actions[key.action].energy
|
|
136
|
+
except KeyError as e:
|
|
137
|
+
raise KeyError(
|
|
138
|
+
f"Action {key.action} not found in component {key.component}. Action occurred "
|
|
139
|
+
f"{counts.total} times."
|
|
140
|
+
) from None
|
|
141
|
+
energy_result[key] = counts.total * energy_per_ac
|
|
142
|
+
|
|
143
|
+
for component_obj in spec.arch.get_nodes_of_type(arch.Component):
|
|
144
|
+
energy_result[ActionKey(component_obj.name, "leak")] = (
|
|
145
|
+
component_obj.total_leak_power
|
|
146
|
+
* overall_latency
|
|
147
|
+
* component_to_non_power_gated_porp.get(component_obj.name, 1)
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
return energy_result
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# from bindings.looptree import LooptreeWorkload, LooptreeWorkloadDependencyAnalyzer
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class EquivalentGroups:
|
|
5
|
+
def __init__(self):
|
|
6
|
+
self.group_id_to_ranks = {}
|
|
7
|
+
self.rank_to_group_id = {}
|
|
8
|
+
|
|
9
|
+
@staticmethod
|
|
10
|
+
def from_workload(
|
|
11
|
+
workload: "LooptreeWorkload", analyzer: "LooptreeWorkloadDependencyAnalyzer"
|
|
12
|
+
):
|
|
13
|
+
einsum_id_to_name = workload.einsum_id_to_name()
|
|
14
|
+
|
|
15
|
+
groups = EquivalentGroups()
|
|
16
|
+
|
|
17
|
+
seen_ranks = set()
|
|
18
|
+
for einsum_id in einsum_id_to_name:
|
|
19
|
+
for rank_id in workload.einsum_ospace_dimensions(einsum_id):
|
|
20
|
+
equiv_ranks = analyzer.equivalent_dimensions(einsum_id, rank_id)
|
|
21
|
+
equiv_ranks = frozenset(equiv_ranks)
|
|
22
|
+
if equiv_ranks not in seen_ranks:
|
|
23
|
+
seen_ranks.add(equiv_ranks)
|
|
24
|
+
group_id = len(groups.group_id_to_ranks)
|
|
25
|
+
groups.group_id_to_ranks[group_id] = equiv_ranks
|
|
26
|
+
for r in equiv_ranks:
|
|
27
|
+
groups.rank_to_group_id[r] = group_id
|
|
28
|
+
|
|
29
|
+
return groups
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .latency import get_latency
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
from typing import overload
|
|
2
|
+
from sympy import Piecewise
|
|
3
|
+
|
|
4
|
+
# from accelforge.model._looptree._isl.singular import get_value_from_singular_qpolynomial
|
|
5
|
+
from accelforge.frontend.arch import Compute
|
|
6
|
+
from accelforge.model._looptree.latency.processors import LATENCY_PROCESSORS
|
|
7
|
+
from accelforge.model._looptree.reuse.isl import IslReuseAnalysisOutput
|
|
8
|
+
from accelforge.model._looptree.reuse import SymbolicAnalysisOutput
|
|
9
|
+
|
|
10
|
+
from accelforge.util._sympy.broadcast_max import Max
|
|
11
|
+
|
|
12
|
+
# from bindings.looptree import SpatialTag
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_latency(looptree_results, mapping, workload, flattened_arch):
|
|
16
|
+
comp_latency = calculate_compute_latency(looptree_results, mapping, workload)
|
|
17
|
+
mem_latency = memory_latency(looptree_results, flattened_arch, mapping, workload)
|
|
18
|
+
|
|
19
|
+
overall_latency = Max(comp_latency, *mem_latency.values())
|
|
20
|
+
return overall_latency, comp_latency, mem_latency
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@overload
|
|
24
|
+
def calculate_compute_latency(
|
|
25
|
+
reuse_analysis_results: IslReuseAnalysisOutput, mapping, workload
|
|
26
|
+
):
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@overload
|
|
31
|
+
def calculate_compute_latency(
|
|
32
|
+
reuse_analysis_results: SymbolicAnalysisOutput, mapping, workload
|
|
33
|
+
):
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def calculate_compute_latency(reuse_analysis_results, mapping, workload):
|
|
38
|
+
if isinstance(reuse_analysis_results, IslReuseAnalysisOutput):
|
|
39
|
+
return compute_isl_latency(
|
|
40
|
+
reuse_analysis_results.temporal_steps, mapping, workload
|
|
41
|
+
)
|
|
42
|
+
elif isinstance(reuse_analysis_results, SymbolicAnalysisOutput):
|
|
43
|
+
return compute_summarized_latency(
|
|
44
|
+
reuse_analysis_results.compute_stats, mapping, workload
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def compute_summarized_latency(compute_stats, mapping, workload):
|
|
49
|
+
# TODO: this is only for single-Einsum!!!
|
|
50
|
+
longest_compute_latency = 0
|
|
51
|
+
for stats in compute_stats.values():
|
|
52
|
+
if longest_compute_latency == 0:
|
|
53
|
+
longest_compute_latency = stats.max_latency
|
|
54
|
+
else:
|
|
55
|
+
longest_compute_latency = Max(longest_compute_latency, stats.max_latency)
|
|
56
|
+
return longest_compute_latency
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def compute_isl_latency(temporal_steps, mapping, workload):
|
|
60
|
+
raise NotImplementedError()
|
|
61
|
+
return get_value_from_singular_qpolynomial(
|
|
62
|
+
_compute_latency(mapping.nodes, 0, temporal_steps, workload)[1]
|
|
63
|
+
).to_python()
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _compute_latency(mapping, top_idx: int, temporal_steps, workload):
|
|
67
|
+
raise NotImplementedError()
|
|
68
|
+
einsum_name_to_id = workload.einsum_name_to_id()
|
|
69
|
+
|
|
70
|
+
next_top_idx = top_idx
|
|
71
|
+
for node in mapping:
|
|
72
|
+
next_top_idx += 1
|
|
73
|
+
|
|
74
|
+
if node["type"] in LATENCY_PROCESSORS.keys():
|
|
75
|
+
children_latencies = [
|
|
76
|
+
_compute_latency(branch, next_top_idx, temporal_steps, workload)
|
|
77
|
+
for branch in node["branches"]
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
return LATENCY_PROCESSORS[node["type"]](top_idx, children_latencies)
|
|
81
|
+
elif node["type"] == "compute":
|
|
82
|
+
einsum = node["einsum"]
|
|
83
|
+
if "incomplete" in node and node["incomplete"]:
|
|
84
|
+
return ([], 0)
|
|
85
|
+
einsum_id = einsum_name_to_id[einsum]
|
|
86
|
+
return temporal_steps[einsum_id]
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def ops_to_latency(dims, map):
|
|
90
|
+
raise NotImplementedError()
|
|
91
|
+
mask = [False] * len(dims)
|
|
92
|
+
new_dims = []
|
|
93
|
+
for i, d in enumerate(dims):
|
|
94
|
+
if d == SpatialTag:
|
|
95
|
+
mask[i] = True
|
|
96
|
+
else:
|
|
97
|
+
new_dims.append(d)
|
|
98
|
+
return map.domain().identity().card()
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
|
|
3
|
+
from accelforge.frontend import arch
|
|
4
|
+
from accelforge.frontend.arch import Leaf, Memory, TensorHolder, Component
|
|
5
|
+
from accelforge.frontend.mapping import Compute, Mapping
|
|
6
|
+
from accelforge.frontend.spec import Spec
|
|
7
|
+
|
|
8
|
+
from accelforge.model._looptree.accesses import isl_buffer_accesses_from_buffet_actions
|
|
9
|
+
from accelforge.model._looptree.mapping_utilities import get_leaves
|
|
10
|
+
from accelforge.model._looptree.reuse.isl import IslReuseAnalysisOutput
|
|
11
|
+
from accelforge.model._looptree.reuse import SymbolicAnalysisOutput
|
|
12
|
+
from accelforge.model._looptree.types import Buffet
|
|
13
|
+
|
|
14
|
+
from accelforge.model._looptree.reuse.symbolic import BuffetStats
|
|
15
|
+
from accelforge.util._parse_expressions import MATH_FUNCS, parse_expression
|
|
16
|
+
from accelforge.util._sympy.broadcast_max import Max, Min
|
|
17
|
+
import sympy as sp
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def isl_to_summarized(
|
|
21
|
+
looptree_results: IslReuseAnalysisOutput, mapping, workload
|
|
22
|
+
) -> SymbolicAnalysisOutput:
|
|
23
|
+
accesses_stats = isl_buffer_accesses_from_buffet_actions(
|
|
24
|
+
looptree_results, mapping, workload, is_path=False
|
|
25
|
+
)
|
|
26
|
+
buffet_stats = {
|
|
27
|
+
Buffet(level=component, tensor=tensor, einsum=einsum): BuffetStats(
|
|
28
|
+
max_per_unit_read_actions=accesses.max_per_unit_reads,
|
|
29
|
+
max_per_unit_write_actions=accesses.max_per_unit_writes,
|
|
30
|
+
)
|
|
31
|
+
for (component, tensor, einsum), accesses in accesses_stats.items()
|
|
32
|
+
}
|
|
33
|
+
return SymbolicAnalysisOutput(buffet_stats=buffet_stats)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def component_latency(
|
|
37
|
+
looptree_results: SymbolicAnalysisOutput,
|
|
38
|
+
flattened_arch: list[Leaf],
|
|
39
|
+
mapping: Mapping,
|
|
40
|
+
spec: Spec,
|
|
41
|
+
):
|
|
42
|
+
component_to_actions: dict[str, dict[str, float]] = defaultdict(
|
|
43
|
+
lambda: defaultdict(lambda: 0)
|
|
44
|
+
)
|
|
45
|
+
name2component: dict[str, Component] = {node.name: node for node in flattened_arch}
|
|
46
|
+
|
|
47
|
+
compute_obj = flattened_arch[-1]
|
|
48
|
+
if not isinstance(compute_obj, arch.Compute):
|
|
49
|
+
raise ValueError("Last node in flattened_arch must be a Compute")
|
|
50
|
+
|
|
51
|
+
for buffet, buffet_stats in looptree_results.buffet_stats.items():
|
|
52
|
+
component = buffet.level
|
|
53
|
+
actions = component_to_actions[component]
|
|
54
|
+
if component not in name2component:
|
|
55
|
+
raise ValueError(f"Component {component} found in mapping but not arch")
|
|
56
|
+
|
|
57
|
+
for action in name2component[component].actions:
|
|
58
|
+
actions[f"{action.name}_actions"] += 0
|
|
59
|
+
|
|
60
|
+
if isinstance(name2component[component], TensorHolder):
|
|
61
|
+
actions["read_actions"] += (
|
|
62
|
+
buffet_stats.max_per_unit_read_actions
|
|
63
|
+
- buffet_stats.min_per_unit_skipped_first_read_actions
|
|
64
|
+
)
|
|
65
|
+
if not isinstance(name2component[component], arch.ProcessingStage):
|
|
66
|
+
actions["write_actions"] += (
|
|
67
|
+
buffet_stats.max_per_unit_write_actions
|
|
68
|
+
- buffet_stats.min_per_unit_skipped_first_write_actions
|
|
69
|
+
)
|
|
70
|
+
elif isinstance(name2component[component], arch.Compute):
|
|
71
|
+
pass
|
|
72
|
+
else:
|
|
73
|
+
raise NotImplementedError(
|
|
74
|
+
f"Component {component} is not a TensorHolder or Compute"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
longest_compute_latency = Max(
|
|
78
|
+
0, *[s.max_latency for s in looptree_results.compute_stats.values()]
|
|
79
|
+
)
|
|
80
|
+
component_to_actions[compute_obj.name]["compute_actions"] = longest_compute_latency
|
|
81
|
+
|
|
82
|
+
# TODO: Unhardcode "compute" name"
|
|
83
|
+
component_to_action_latency = defaultdict(dict)
|
|
84
|
+
for component, actions in component_to_actions.items():
|
|
85
|
+
component_obj = name2component[component]
|
|
86
|
+
for action, count in actions.items():
|
|
87
|
+
action_name = action.rsplit("_", 1)[0]
|
|
88
|
+
latency = component_obj.actions[action_name].latency
|
|
89
|
+
component_to_action_latency[component][f"{action_name}_latency"] = (
|
|
90
|
+
latency * count
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
component_latency = {}
|
|
94
|
+
|
|
95
|
+
symbol_table_base = {
|
|
96
|
+
**dict(spec.variables),
|
|
97
|
+
"variables": spec.variables,
|
|
98
|
+
"max": Max,
|
|
99
|
+
"min": Min,
|
|
100
|
+
"sum": sp.Add,
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
for component, actions in component_to_actions.items():
|
|
104
|
+
component_obj = name2component[component]
|
|
105
|
+
symbol_table = {
|
|
106
|
+
"action2latency": component_to_action_latency[component],
|
|
107
|
+
**symbol_table_base,
|
|
108
|
+
**dict(name2component[component]),
|
|
109
|
+
**actions,
|
|
110
|
+
**component_to_action_latency[component],
|
|
111
|
+
}
|
|
112
|
+
if name2component[component].total_latency is not None:
|
|
113
|
+
component_latency[component] = parse_expression(
|
|
114
|
+
name2component[component].total_latency,
|
|
115
|
+
symbol_table,
|
|
116
|
+
attr_name="latency",
|
|
117
|
+
location=component,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
return component_latency
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import pprint
|
|
2
|
+
|
|
3
|
+
import islpy as isl
|
|
4
|
+
|
|
5
|
+
# from bindings.looptree import PipelineSpatialTag
|
|
6
|
+
# from pytimeloop._isl.sum import sum_until_idx, make_reduction_map
|
|
7
|
+
# from pytimeloop._isl.qpolynomial import from_pw_qpolynomial_fold
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def process_sequential_latency(top_idx: int, latencies):
|
|
11
|
+
common_dim_tags = latencies[0][0][:top_idx]
|
|
12
|
+
try:
|
|
13
|
+
total_sequential_latency = sum(
|
|
14
|
+
sum_until_idx(top_idx, latency) for dim_tags, latency in latencies
|
|
15
|
+
)
|
|
16
|
+
except:
|
|
17
|
+
print("Bad input:")
|
|
18
|
+
pprint.pp(latencies)
|
|
19
|
+
raise
|
|
20
|
+
return common_dim_tags, total_sequential_latency
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def process_pipeline_latency(top_idx: int, latencies):
|
|
24
|
+
sequential_latency = process_sequential_latency(top_idx, latencies)[1]
|
|
25
|
+
|
|
26
|
+
all_dim_tags = latencies[0][0]
|
|
27
|
+
dim_tags = all_dim_tags[:]
|
|
28
|
+
for pipeline_idx in range(len(dim_tags)):
|
|
29
|
+
if isinstance(dim_tags[pipeline_idx], PipelineSpatialTag):
|
|
30
|
+
break
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
dim_tags = dim_tags[: pipeline_idx + 1]
|
|
34
|
+
summed_latency = sum(
|
|
35
|
+
sum_until_idx(pipeline_idx + 1, latency) for tags, latency in latencies
|
|
36
|
+
)
|
|
37
|
+
except:
|
|
38
|
+
print("Bad input:")
|
|
39
|
+
pprint.pp(latencies)
|
|
40
|
+
raise
|
|
41
|
+
|
|
42
|
+
space = summed_latency.get_domain_space()
|
|
43
|
+
hidden_latency_map = make_hidden_latency_map(dim_tags, space, len(latencies))
|
|
44
|
+
hidden_latencies = hidden_latency_map.apply_pw_qpolynomial(summed_latency)
|
|
45
|
+
|
|
46
|
+
reduction_map = make_reduction_map(space, len(dim_tags) - 1, 1)
|
|
47
|
+
reduction_map = reduction_map.intersect_range(summed_latency.domain()).coalesce()
|
|
48
|
+
hidden_latencies, is_tight = reduction_map.apply_pw_qpolynomial_fold(
|
|
49
|
+
isl.PwQPolynomialFold.from_pw_qpolynomial(isl.fold.min, hidden_latencies)
|
|
50
|
+
)
|
|
51
|
+
hidden_latencies = from_pw_qpolynomial_fold(hidden_latencies)
|
|
52
|
+
|
|
53
|
+
# Remove last one
|
|
54
|
+
domain = hidden_latencies.domain()
|
|
55
|
+
hidden_latencies = hidden_latencies.subtract_domain(domain.lexmax())
|
|
56
|
+
|
|
57
|
+
hidden_latency = sum_until_idx(top_idx, hidden_latencies)
|
|
58
|
+
|
|
59
|
+
return all_dim_tags[:top_idx], sequential_latency - hidden_latency
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
LATENCY_PROCESSORS = {
|
|
63
|
+
"sequential": process_sequential_latency,
|
|
64
|
+
"pipeline": process_pipeline_latency,
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def make_hidden_latency_map(dim_tags, space, n_stages):
|
|
69
|
+
"""
|
|
70
|
+
space: [..., t, ps]
|
|
71
|
+
returns: [..., t, ps] -> [..., t', ps'] : P*t+ps+1 <= P*t'+ps' < P*t+ps+P
|
|
72
|
+
"""
|
|
73
|
+
assert len(dim_tags) >= 2
|
|
74
|
+
|
|
75
|
+
t_idx = len(dim_tags) - 2
|
|
76
|
+
ps_idx = len(dim_tags) - 1
|
|
77
|
+
|
|
78
|
+
tprime = isl.Aff.var_on_domain(space, isl.dim_type.set, t_idx)
|
|
79
|
+
ps_prime = isl.Aff.var_on_domain(space, isl.dim_type.set, ps_idx)
|
|
80
|
+
inner = n_stages * tprime + ps_prime
|
|
81
|
+
|
|
82
|
+
lower = n_stages * tprime + ps_prime + 1
|
|
83
|
+
upper = n_stages * tprime + ps_prime + n_stages
|
|
84
|
+
|
|
85
|
+
hidden_latency_map = lower.le_map(inner).intersect(upper.gt_map(inner))
|
|
86
|
+
|
|
87
|
+
# Make other dimensions equal
|
|
88
|
+
for i in range(t_idx):
|
|
89
|
+
var = isl.Aff.var_on_domain(space, isl.dim_type.set, i)
|
|
90
|
+
hidden_latency_map = hidden_latency_map.intersect(var.eq_map(var))
|
|
91
|
+
|
|
92
|
+
return hidden_latency_map
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
from typing import Generator, List, Tuple
|
|
2
|
+
|
|
3
|
+
from accelforge.frontend.mapping import (
|
|
4
|
+
Compute,
|
|
5
|
+
Mapping,
|
|
6
|
+
MappingNode,
|
|
7
|
+
Pipeline,
|
|
8
|
+
Sequential,
|
|
9
|
+
)
|
|
10
|
+
from accelforge.frontend.workload import Workload
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_paths(root: Mapping) -> Generator[Tuple[MappingNode, Compute], None, None]:
|
|
14
|
+
"""
|
|
15
|
+
Given a MappingNode, get the paths to all all leaves in post-order.
|
|
16
|
+
|
|
17
|
+
:param root: The root of the child exploration.
|
|
18
|
+
|
|
19
|
+
:type root: MappingNode
|
|
20
|
+
|
|
21
|
+
:returns: A generator of all the MappingNodes to a Compute leaf.
|
|
22
|
+
:rtype: Generator[List[MappingNode]]
|
|
23
|
+
"""
|
|
24
|
+
cur_path: List[MappingNode] = []
|
|
25
|
+
for node in root.nodes:
|
|
26
|
+
cur_path.append(node)
|
|
27
|
+
match node:
|
|
28
|
+
# Pipelines or sequentials should have their paths expanded.
|
|
29
|
+
# Mappings naturally get expanded.
|
|
30
|
+
case Mapping() | Pipeline() | Sequential():
|
|
31
|
+
for child in node.nodes:
|
|
32
|
+
for subpath in get_paths(child):
|
|
33
|
+
yield tuple(cur_path) + subpath
|
|
34
|
+
# Computes are leaves so should get a yield here.
|
|
35
|
+
case Compute():
|
|
36
|
+
yield tuple(cur_path)
|
|
37
|
+
# Not implemented so continue.
|
|
38
|
+
case _:
|
|
39
|
+
# TODO: Check this is correct
|
|
40
|
+
continue
|
|
41
|
+
raise NotImplementedError(
|
|
42
|
+
f"{type(node)} does not have type elucidation.\n"
|
|
43
|
+
f"---\n"
|
|
44
|
+
f"node={node}"
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def get_leaves(mapping: Mapping, is_path):
|
|
49
|
+
if is_path:
|
|
50
|
+
yield mapping[-1]
|
|
51
|
+
return
|
|
52
|
+
for node in mapping:
|
|
53
|
+
if isinstance(node, Pipeline) or isinstance(node, Sequential):
|
|
54
|
+
for child in node.children:
|
|
55
|
+
yield from get_leaves(child, is_path)
|
|
56
|
+
elif isinstance(node, Compute):
|
|
57
|
+
yield node
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def get_intermediate_tensors(workload: Workload):
|
|
61
|
+
result = set()
|
|
62
|
+
for einsum in workload.einsum_id_to_name():
|
|
63
|
+
written_tensors = workload.einsums[einsum].output_tensor_names
|
|
64
|
+
for tensor in written_tensors:
|
|
65
|
+
reader_einsums = workload.reader_einsums(tensor)
|
|
66
|
+
for reader in reader_einsums:
|
|
67
|
+
if reader in workload.einsum_id_to_name():
|
|
68
|
+
result.add(tensor)
|
|
69
|
+
break
|
|
70
|
+
|
|
71
|
+
return result
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .des import IslReuseAnalysisOutput
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""
|
|
2
|
+
TODO: Is this file still necessary? It is referenced elsewhere but is no longer
|
|
3
|
+
the format we are looking for.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
|
|
8
|
+
import islpy as isl
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class IslReuseAnalysisOutput:
|
|
13
|
+
ops: dict = field(default_factory=dict)
|
|
14
|
+
fills: dict = field(default_factory=dict)
|
|
15
|
+
occupancy: dict = field(default_factory=dict)
|
|
16
|
+
op_occupancy: dict = field(default_factory=dict)
|
|
17
|
+
reads_to_peer: dict = field(default_factory=dict)
|
|
18
|
+
reads_to_parent: dict = field(default_factory=dict)
|
|
19
|
+
temporal_steps: dict = field(default_factory=dict)
|
|
20
|
+
fanout: dict = field(default_factory=dict)
|
|
21
|
+
op_intensity: dict = field(default_factory=dict)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def deserialize_looptree_output(
|
|
25
|
+
looptree_output, isl_ctx: isl.Context #: bindings.looptree.LooptreeResult,
|
|
26
|
+
) -> IslReuseAnalysisOutput:
|
|
27
|
+
output = IslReuseAnalysisOutput()
|
|
28
|
+
|
|
29
|
+
output.ops = {
|
|
30
|
+
k: (dims, isl.PwQPolynomial.read_from_str(isl_ctx, v))
|
|
31
|
+
for k, (dims, v) in looptree_output.ops.items()
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
output.fills = {
|
|
35
|
+
k: (dims, isl.PwQPolynomial.read_from_str(isl_ctx, v))
|
|
36
|
+
for k, (dims, v) in looptree_output.fills.items()
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
output.occupancy = {
|
|
40
|
+
k: (dims, isl.PwQPolynomial.read_from_str(isl_ctx, v))
|
|
41
|
+
for k, (dims, v) in looptree_output.occupancy.items()
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
output.reads_to_peer = {
|
|
45
|
+
k: (dims, isl.PwQPolynomial.read_from_str(isl_ctx, v))
|
|
46
|
+
for k, (dims, v) in looptree_output.reads_to_peer.items()
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
output.reads_to_parent = {
|
|
50
|
+
k: (dims, isl.PwQPolynomial.read_from_str(isl_ctx, v))
|
|
51
|
+
for k, (dims, v) in looptree_output.reads_to_parent.items()
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
output.temporal_steps = {
|
|
55
|
+
k: (dims, isl.PwQPolynomial.read_from_str(isl_ctx, v))
|
|
56
|
+
for k, (dims, v) in looptree_output.temporal_steps.items()
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
return output
|