accelforge 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- accelforge/__init__.py +21 -0
- accelforge/_accelerated_imports.py +16 -0
- accelforge/_deprecate/_simanneal/evalmapping.py +271 -0
- accelforge/_deprecate/_simanneal/mapspaceglobals.py +298 -0
- accelforge/_deprecate/_simanneal/simanneal.py +666 -0
- accelforge/_deprecate/_simanneal/tracking.py +105 -0
- accelforge/_deprecate/_simanneal/wrappers.py +218 -0
- accelforge/_deprecate/_simanneal2/__init__.py +7 -0
- accelforge/_deprecate/_simanneal2/simanneal.py +493 -0
- accelforge/_deprecate/_simanneal2/tracking.py +116 -0
- accelforge/_deprecate/compatibility_util.py +181 -0
- accelforge/_deprecate/layerdeduplication/__init__.py +2 -0
- accelforge/_deprecate/layerdeduplication/group_similar_einsums.py +160 -0
- accelforge/_deprecate/layerdeduplication/grouped_einsums.py +84 -0
- accelforge/_deprecate/mapping_filter_tags/__init__.py +2 -0
- accelforge/_deprecate/mapping_filter_tags/ffmt.py +212 -0
- accelforge/_deprecate/mapping_filter_tags/onesplit.py +24 -0
- accelforge/_deprecate/mapping_filter_tags/util.py +24 -0
- accelforge/_deprecate/tags.py +69 -0
- accelforge/_deprecate/viz/__init__.py +0 -0
- accelforge/_deprecate/viz/interactive.py +159 -0
- accelforge/_deprecate/viz/reservationtree.py +307 -0
- accelforge/_deprecate/viz/ski_slope.py +88 -0
- accelforge/_version.py +15 -0
- accelforge/examples.py +39 -0
- accelforge/frontend/__init__.py +10 -0
- accelforge/frontend/_binding.py +129 -0
- accelforge/frontend/_workload_isl/__init__.py +2 -0
- accelforge/frontend/_workload_isl/_isl.py +149 -0
- accelforge/frontend/_workload_isl/_symbolic.py +141 -0
- accelforge/frontend/arch copy.py +1544 -0
- accelforge/frontend/arch.py +1642 -0
- accelforge/frontend/config.py +63 -0
- accelforge/frontend/mapper/__init__.py +5 -0
- accelforge/frontend/mapper/ffm.py +126 -0
- accelforge/frontend/mapper/mapper.py +7 -0
- accelforge/frontend/mapper/metrics.py +30 -0
- accelforge/frontend/mapping/__init__.py +1 -0
- accelforge/frontend/mapping/mapping.py +1736 -0
- accelforge/frontend/model.py +14 -0
- accelforge/frontend/renames.py +150 -0
- accelforge/frontend/spec copy.py +230 -0
- accelforge/frontend/spec.py +301 -0
- accelforge/frontend/variables.py +12 -0
- accelforge/frontend/workload.py +952 -0
- accelforge/mapper/FFM/__init__.py +9 -0
- accelforge/mapper/FFM/_join_pmappings/__init__.py +0 -0
- accelforge/mapper/FFM/_join_pmappings/compatibility.py +653 -0
- accelforge/mapper/FFM/_join_pmappings/compress_pmappings.py +140 -0
- accelforge/mapper/FFM/_join_pmappings/join_pmappings.py +703 -0
- accelforge/mapper/FFM/_join_pmappings/pmapping_dataframe.py +901 -0
- accelforge/mapper/FFM/_join_pmappings/pmapping_group.py +337 -0
- accelforge/mapper/FFM/_make_pmappings/contraints/__init__.py +0 -0
- accelforge/mapper/FFM/_make_pmappings/contraints/constraints.py +360 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/__init__.py +1 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_loops.py +373 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_pmapping_templates.py +463 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_reservations.py +95 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storage_order.py +382 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storages.py +155 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings.py +411 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/__init__.py +1 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_pmappings_from_templates.py +407 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_tile_shapes.py +1681 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/run_model.py +170 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/symbol_relations.py +174 -0
- accelforge/mapper/FFM/_make_pmappings/pmapper_job.py +282 -0
- accelforge/mapper/FFM/_pareto_df/df_convention.py +273 -0
- accelforge/mapper/FFM/_pareto_df/pareto copy.py +836 -0
- accelforge/mapper/FFM/_pareto_df/pareto.py +508 -0
- accelforge/mapper/FFM/data.py +61 -0
- accelforge/mapper/FFM/main copy.py +236 -0
- accelforge/mapper/FFM/main.py +208 -0
- accelforge/mapper/FFM/mappings.py +510 -0
- accelforge/mapper/FFM/pmappings.py +310 -0
- accelforge/mapper/__init__.py +4 -0
- accelforge/mapper.py +0 -0
- accelforge/model/__init__.py +1 -0
- accelforge/model/_looptree/__init__.py +0 -0
- accelforge/model/_looptree/accesses.py +335 -0
- accelforge/model/_looptree/capacity/__init__.py +1 -0
- accelforge/model/_looptree/capacity/aggregators.py +36 -0
- accelforge/model/_looptree/capacity/capacity.py +47 -0
- accelforge/model/_looptree/energy.py +150 -0
- accelforge/model/_looptree/equivalent_ranks.py +29 -0
- accelforge/model/_looptree/latency/__init__.py +1 -0
- accelforge/model/_looptree/latency/latency.py +98 -0
- accelforge/model/_looptree/latency/memory.py +120 -0
- accelforge/model/_looptree/latency/processors.py +92 -0
- accelforge/model/_looptree/mapping_utilities.py +71 -0
- accelforge/model/_looptree/reuse/__init__.py +4 -0
- accelforge/model/_looptree/reuse/isl/__init__.py +1 -0
- accelforge/model/_looptree/reuse/isl/des.py +59 -0
- accelforge/model/_looptree/reuse/isl/isl_functions.py +374 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/__init__.py +4 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/analyze_mapping.py +297 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/skews_from_mapping.py +236 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/tiling.py +685 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/types.py +188 -0
- accelforge/model/_looptree/reuse/isl/spatial.py +260 -0
- accelforge/model/_looptree/reuse/isl/temporal.py +182 -0
- accelforge/model/_looptree/reuse/symbolic/__init__.py +1 -0
- accelforge/model/_looptree/reuse/symbolic/symbolic copy 2.py +1346 -0
- accelforge/model/_looptree/reuse/symbolic/symbolic copy.py +1408 -0
- accelforge/model/_looptree/reuse/symbolic/symbolic.py +1396 -0
- accelforge/model/_looptree/run.py +122 -0
- accelforge/model/_looptree/types.py +26 -0
- accelforge/model/_looptree/visualization/__init__.py +0 -0
- accelforge/model/_looptree/visualization/occupancy.py +11 -0
- accelforge/model/main.py +222 -0
- accelforge/plotting/__init__.py +2 -0
- accelforge/plotting/mappings.py +219 -0
- accelforge/plotting/specs.py +57 -0
- accelforge/util/__init__.py +4 -0
- accelforge/util/_base_analysis_types.py +24 -0
- accelforge/util/_basetypes.py +1089 -0
- accelforge/util/_frozenset.py +36 -0
- accelforge/util/_isl.py +29 -0
- accelforge/util/_itertools.py +14 -0
- accelforge/util/_mathfuncs.py +57 -0
- accelforge/util/_parse_expressions.py +339 -0
- accelforge/util/_picklecache.py +32 -0
- accelforge/util/_setexpressions.py +268 -0
- accelforge/util/_sympy/__init__.py +0 -0
- accelforge/util/_sympy/broadcast_max.py +18 -0
- accelforge/util/_visualization.py +112 -0
- accelforge/util/_yaml.py +579 -0
- accelforge/util/parallel.py +193 -0
- accelforge-0.0.1.dist-info/METADATA +64 -0
- accelforge-0.0.1.dist-info/RECORD +258 -0
- accelforge-0.0.1.dist-info/WHEEL +5 -0
- accelforge-0.0.1.dist-info/licenses/LICENSE +19 -0
- accelforge-0.0.1.dist-info/top_level.txt +5 -0
- docs/_build/html/_sources/fastfusion.frontend.mapper.rst.txt +37 -0
- docs/_build/html/_sources/fastfusion.frontend.rst.txt +70 -0
- docs/_build/html/_sources/fastfusion.frontend.workload.rst.txt +21 -0
- docs/_build/html/_sources/fastfusion.mapper.FFM.rst.txt +37 -0
- docs/_build/html/_sources/fastfusion.mapper.rst.txt +18 -0
- docs/_build/html/_sources/fastfusion.rst.txt +20 -0
- docs/_build/html/_sources/fastfusion.util.rst.txt +21 -0
- docs/_build/html/_sources/index.rst.txt +87 -0
- docs/_build/html/_sources/modules.rst.txt +7 -0
- docs/_build/html/_sources/notes/citation.rst.txt +45 -0
- docs/_build/html/_sources/notes/definitions.rst.txt +43 -0
- docs/_build/html/_sources/notes/faqs.rst.txt +39 -0
- docs/_build/html/_sources/notes/modeling/accelerator_energy_latency.rst.txt +72 -0
- docs/_build/html/_sources/notes/modeling/component_energy_area.rst.txt +96 -0
- docs/_build/html/_sources/notes/modeling/mapping.rst.txt +100 -0
- docs/_build/html/_sources/notes/modeling.rst.txt +33 -0
- docs/_build/html/_sources/notes/parsing/arithmetic_parsing.rst.txt +136 -0
- docs/_build/html/_sources/notes/parsing/setexpressions.rst.txt +63 -0
- docs/_build/html/_sources/notes/parsing/yaml_parsing.rst.txt +176 -0
- docs/_build/html/_sources/notes/quickstart_and_installation.rst.txt +9 -0
- docs/_build/html/_sources/notes/spec/architecture.rst.txt +133 -0
- docs/_build/html/_sources/notes/spec/mapping.rst.txt +12 -0
- docs/_build/html/_sources/notes/spec/workload.rst.txt +83 -0
- docs/_build/html/_sources/notes/spec.rst.txt +36 -0
- docs/source/_ext/include_attrs.py +213 -0
- docs/source/_ext/include_docstring.py +364 -0
- docs/source/_ext/include_functions.py +154 -0
- docs/source/_ext/include_notebook.py +131 -0
- docs/source/_ext/include_yaml.py +119 -0
- docs/source/_ext/inherited_attributes.py +222 -0
- docs/source/_ext/paths.py +4 -0
- docs/source/conf.py +79 -0
- examples/arches/compute_in_memory/_include.yaml +74 -0
- examples/arches/compute_in_memory/_include_functions.py +229 -0
- examples/arches/compute_in_memory/_load_spec.py +57 -0
- examples/arches/compute_in_memory/components/c2c_multiplier.py +181 -0
- examples/arches/compute_in_memory/components/dac_c2c_r2r.py +605 -0
- examples/arches/compute_in_memory/components/misc.py +195 -0
- examples/arches/compute_in_memory/components/util/bit_functions.py +51 -0
- examples/arches/compute_in_memory/components/zero_comparator.py +92 -0
- examples/arches/compute_in_memory/isaac.yaml +233 -0
- examples/arches/compute_in_memory/memory_cells/ecram_demo.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/rram_example.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/rram_isaac_isca_2016.yaml +64 -0
- examples/arches/compute_in_memory/memory_cells/rram_neurosim_default.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/rram_raella_isca_2023.yaml +70 -0
- examples/arches/compute_in_memory/memory_cells/rram_wan_nature_2022.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_colonnade_jssc_2021.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_example.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_jia_jssc_2020.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_sinangil_jssc_2021.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_wang_vlsi_2022.yaml +63 -0
- examples/arches/compute_in_memory/wang_vlsi_2022.yaml +289 -0
- examples/arches/eyeriss.yaml +68 -0
- examples/arches/fanout_variations/at_glb.yaml +31 -0
- examples/arches/fanout_variations/at_glb_with_fanout_node.yaml +34 -0
- examples/arches/fanout_variations/at_mac.yaml +31 -0
- examples/arches/fanout_variations/at_mac_with_constraints.yaml +38 -0
- examples/arches/fanout_variations/at_mac_with_fanout_node.yaml +34 -0
- examples/arches/nvdla.yaml +47 -0
- examples/arches/simple.yaml +28 -0
- examples/arches/tpu_v4i.yaml +67 -0
- examples/mappings/unfused_matmuls_to_simple.yaml +33 -0
- examples/misc/component_annotated.yaml +33 -0
- examples/workloads/gpt3_6.7B.yaml +124 -0
- examples/workloads/matmuls.yaml +20 -0
- examples/workloads/mobilenet_28.yaml +81 -0
- examples/workloads/mobilenet_various_separate.yaml +106 -0
- examples/workloads/three_matmuls_annotated.yaml +59 -0
- notebooks/.ipynb_checkpoints/fastfusion_arch_study_michael-checkpoint.ipynb +359 -0
- notebooks/compute_in_memory/_scripts.py +339 -0
- notebooks/compute_in_memory/isaac.guide.ipynb +270 -0
- notebooks/compute_in_memory/wang_vlsi_2022.ipynb +602 -0
- notebooks/paths.py +4 -0
- notebooks/tutorials/.ipynb_checkpoints/1_FFM-checkpoint.ipynb +3110 -0
- notebooks/tutorials/FFM.ipynb +3498 -0
- notebooks/tutorials/_include.py +48 -0
- notebooks/tutorials/component_energy_area.ipynb +363 -0
- tests/Q_mapping.yaml +38 -0
- tests/__init__.py +0 -0
- tests/conv.mapping.yaml +27 -0
- tests/conv.workload.yaml +13 -0
- tests/conv_sym.mapping.yaml +43 -0
- tests/copy.mapping.yaml +35 -0
- tests/copy.workload.yaml +15 -0
- tests/distribuffers/__init__.py +0 -0
- tests/distribuffers/multicast/test_cases.yaml +482 -0
- tests/distribuffers/spec/binding/valid_bindings.yaml +97 -0
- tests/distribuffers/spec/distributed.yaml +100 -0
- tests/distribuffers/spec/logical_arch.yaml +32 -0
- tests/distribuffers/spec/physical_arch.yaml +69 -0
- tests/distribuffers/test_binding.py +48 -0
- tests/frontend/__init__.py +0 -0
- tests/frontend/test_mapping_viz.py +52 -0
- tests/mapper/__init__.py +0 -0
- tests/mapper/configs/conv1d/conv1d.mapping.yaml +31 -0
- tests/mapper/configs/conv1d/conv1d.workload.yaml +11 -0
- tests/mapper/configs/two_conv1d/two_conv1d.expected.yaml +38 -0
- tests/mapper/configs/two_conv1d/two_conv1d.mapping.yaml +54 -0
- tests/mapper/configs/two_conv1d/two_conv1d.workload.yaml +19 -0
- tests/mapper/test_mapping_to_isl.py +90 -0
- tests/mapper/test_spatial_reuse_analysis.py +67 -0
- tests/mapper/test_temporal_reuse_analysis.py +56 -0
- tests/mapper/util.py +58 -0
- tests/matmul.mapping.yaml +29 -0
- tests/matmul.workload.yaml +12 -0
- tests/matmul_spatial.mapping.yaml +44 -0
- tests/mha.renames.yaml +65 -0
- tests/mha.workload.yaml +67 -0
- tests/mha.yaml +59 -0
- tests/mha_full.workload.yaml +67 -0
- tests/mobilenet.workload.yaml +35 -0
- tests/mobilenet_long.workload.yaml +64 -0
- tests/pmappingcache.py +24 -0
- tests/processing_stage.arch.yaml +40 -0
- tests/snowcat.arch.yaml +36 -0
- tests/test_ffm_join_pmappings.py +106 -0
- tests/test_ffm_make_pmappings.py +82 -0
- tests/test_ffm_make_tile_shapes.py +49 -0
- tests/test_mapper.py +100 -0
- tests/test_model.py +37 -0
- tests/test_plotting.py +72 -0
- tests/test_processing_stage.py +46 -0
- tests/test_symbolic_model.py +248 -0
- tests/test_workload.py +141 -0
accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_pmappings_from_templates.py
ADDED
|
@@ -0,0 +1,407 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import math
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from uuid import UUID
|
|
5
|
+
from collections import defaultdict
|
|
6
|
+
|
|
7
|
+
import sympy
|
|
8
|
+
from accelforge.frontend.mapping import Loop, Mapping, Spatial, Temporal
|
|
9
|
+
from accelforge.frontend.workload import EinsumName
|
|
10
|
+
from accelforge.mapper.FFM._join_pmappings.compatibility import (
|
|
11
|
+
Compatibility,
|
|
12
|
+
)
|
|
13
|
+
from accelforge.mapper.FFM._join_pmappings.pmapping_dataframe import (
|
|
14
|
+
MAPPING_COLUMN,
|
|
15
|
+
PmappingDataframe,
|
|
16
|
+
col2nameloop,
|
|
17
|
+
col_used_in_pareto,
|
|
18
|
+
is_reservation_col,
|
|
19
|
+
makepareto,
|
|
20
|
+
tensor2col,
|
|
21
|
+
col2nameloop,
|
|
22
|
+
is_reservation_col,
|
|
23
|
+
nameloop2col,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
from accelforge.frontend.mapper.metrics import Metrics
|
|
27
|
+
from accelforge.mapper.FFM._make_pmappings.make_pmappings_from_templates.make_tile_shapes import (
|
|
28
|
+
make_tile_shapes,
|
|
29
|
+
IMPERFECT,
|
|
30
|
+
)
|
|
31
|
+
from accelforge.mapper.FFM._join_pmappings.pmapping_group import PmappingGroup
|
|
32
|
+
from accelforge.mapper.FFM._make_pmappings.pmapper_job import (
|
|
33
|
+
Job,
|
|
34
|
+
SameCompatibilityJobs,
|
|
35
|
+
)
|
|
36
|
+
from accelforge.mapper.FFM._pareto_df.df_convention import (
|
|
37
|
+
is_fused_loop_col,
|
|
38
|
+
is_n_iterations_col,
|
|
39
|
+
)
|
|
40
|
+
from accelforge.util._mathfuncs import _count_factorizations
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def shift_reservations_by_null_loop_indices(
|
|
44
|
+
mappings: pd.DataFrame, null_loop_indices: set[int]
|
|
45
|
+
):
|
|
46
|
+
prev = copy.deepcopy(mappings) # TODO: Is this needed?
|
|
47
|
+
target2newabovename = {}
|
|
48
|
+
dropcols = []
|
|
49
|
+
for c in mappings.columns:
|
|
50
|
+
if not is_reservation_col(c):
|
|
51
|
+
continue
|
|
52
|
+
name, above = col2nameloop(c)
|
|
53
|
+
new_above = above - sum(above > i for i in null_loop_indices)
|
|
54
|
+
target = nameloop2col(name, new_above)
|
|
55
|
+
if target in target2newabovename:
|
|
56
|
+
if above > target2newabovename[target][1]:
|
|
57
|
+
dropcols.append(nameloop2col(*target2newabovename[target]))
|
|
58
|
+
target2newabovename[target] = (name, above)
|
|
59
|
+
else:
|
|
60
|
+
dropcols.append(c)
|
|
61
|
+
else:
|
|
62
|
+
target2newabovename[target] = (name, above)
|
|
63
|
+
|
|
64
|
+
mappings.drop(columns=dropcols, inplace=True)
|
|
65
|
+
renames = {}
|
|
66
|
+
for target, (name, above) in target2newabovename.items():
|
|
67
|
+
renames[nameloop2col(name, above)] = target
|
|
68
|
+
mappings.rename(columns=renames, inplace=True)
|
|
69
|
+
if len(mappings.columns) != len(mappings.columns.unique()):
|
|
70
|
+
shift_reservations_by_null_loop_indices(prev, null_loop_indices)
|
|
71
|
+
raise ValueError(f"Duplicate columns: {mappings.columns}")
|
|
72
|
+
assert len(mappings.columns) == len(mappings.columns.unique())
|
|
73
|
+
return mappings
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def get_equivalent_pmappings(
|
|
77
|
+
pmapping_group: PmappingGroup, reservation_levels: set[int]
|
|
78
|
+
) -> list[PmappingGroup]:
|
|
79
|
+
equivalent_permutations = pmapping_group.compatibility.make_equivalent_permutations(
|
|
80
|
+
reservation_levels
|
|
81
|
+
)
|
|
82
|
+
result = [PmappingGroup(c, None) for c in equivalent_permutations]
|
|
83
|
+
return result
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def mapping2fused_loop_cols(mapping: Mapping, einsum_name: EinsumName):
|
|
87
|
+
cols = []
|
|
88
|
+
for loop in [l for l in mapping.nodes if isinstance(l, Loop) and l._fused]:
|
|
89
|
+
if loop.tile_shape is not None:
|
|
90
|
+
cols.append(loop.tile_shape)
|
|
91
|
+
elif loop.tile_pattern is not None:
|
|
92
|
+
cols.append(loop.tile_pattern.tile_shape)
|
|
93
|
+
cols.append(loop.tile_pattern.initial_tile_shape)
|
|
94
|
+
else:
|
|
95
|
+
raise ValueError(f"Can't find tile shape or tile pattern for loop {loop}")
|
|
96
|
+
return [f"{einsum_name}<SEP>{c}" if isinstance(c, str) else c for c in cols]
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def get_fused_loop_indices(
|
|
100
|
+
df: pd.DataFrame,
|
|
101
|
+
compatibility: Compatibility,
|
|
102
|
+
einsum_name: EinsumName,
|
|
103
|
+
return_as_int: bool = False,
|
|
104
|
+
) -> pd.Series | int:
|
|
105
|
+
result = []
|
|
106
|
+
|
|
107
|
+
loops = compatibility.loops
|
|
108
|
+
for i, loop in enumerate(loops):
|
|
109
|
+
col = loop.tile_pattern.calculated_n_iterations
|
|
110
|
+
assert col is not None, f"Loop {loop} has no calculated n_iterations"
|
|
111
|
+
if isinstance(col, str):
|
|
112
|
+
col = df[f"{einsum_name}<SEP>{col}"]
|
|
113
|
+
elif isinstance(col, sympy.Symbol):
|
|
114
|
+
col = df[f"{einsum_name}<SEP>{col.name}"]
|
|
115
|
+
result.append(col != 1)
|
|
116
|
+
|
|
117
|
+
if return_as_int:
|
|
118
|
+
n = 0
|
|
119
|
+
for b in result:
|
|
120
|
+
n = n * 2 + b
|
|
121
|
+
return n
|
|
122
|
+
else:
|
|
123
|
+
r2 = []
|
|
124
|
+
for b in result:
|
|
125
|
+
if len(b.unique()) > 1:
|
|
126
|
+
raise ValueError(f"This won't work if there's more than one")
|
|
127
|
+
r2.append(b.iloc[0])
|
|
128
|
+
return r2
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _count_loops(job: Job) -> tuple[list[int], list[int], dict[str, int]]:
|
|
132
|
+
nodes = job.mapping.nodes
|
|
133
|
+
temporal_n_loops = []
|
|
134
|
+
spatial_n_loops = []
|
|
135
|
+
rv_spatial_count = defaultdict(int)
|
|
136
|
+
rv_temporal_count = defaultdict(int)
|
|
137
|
+
cur_n_loops = 0
|
|
138
|
+
spatial_dim = None
|
|
139
|
+
|
|
140
|
+
def pop_loop():
|
|
141
|
+
nonlocal cur_n_loops
|
|
142
|
+
if cur_n_loops >= 1:
|
|
143
|
+
if spatial_dim is not None:
|
|
144
|
+
spatial_n_loops.append(cur_n_loops)
|
|
145
|
+
else:
|
|
146
|
+
temporal_n_loops.append(cur_n_loops)
|
|
147
|
+
cur_n_loops = 0
|
|
148
|
+
|
|
149
|
+
for node in nodes:
|
|
150
|
+
cur_spatial_dim = None
|
|
151
|
+
if isinstance(node, Spatial):
|
|
152
|
+
cur_spatial_dim = node.name
|
|
153
|
+
rv_spatial_count[node.rank_variable] += 1
|
|
154
|
+
if cur_spatial_dim != spatial_dim:
|
|
155
|
+
pop_loop()
|
|
156
|
+
spatial_dim = cur_spatial_dim
|
|
157
|
+
if isinstance(node, Loop):
|
|
158
|
+
cur_n_loops += 1
|
|
159
|
+
if isinstance(node, Temporal):
|
|
160
|
+
rv_temporal_count[node.rank_variable] += 1
|
|
161
|
+
else:
|
|
162
|
+
pop_loop()
|
|
163
|
+
pop_loop()
|
|
164
|
+
return temporal_n_loops, spatial_n_loops, rv_spatial_count, rv_temporal_count
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def multiply_n_pmappings_by_permutations(n_pmappings: int, job: Job) -> int:
|
|
168
|
+
option = job.spec.mapper.ffm._count_option_for_mapsapce_size_evaluation
|
|
169
|
+
# if option == "normal":
|
|
170
|
+
# return n_pmappings
|
|
171
|
+
|
|
172
|
+
temporal_n_loops, spatial_n_loops, rv_spatial_count, rv_temporal_count = (
|
|
173
|
+
_count_loops(job)
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
rv = {k: v for k, v in job.rank_variable_bounds.items()}
|
|
177
|
+
|
|
178
|
+
if "non_helpful_tile_shapes" in option:
|
|
179
|
+
rv_temporal_count = {r: len(temporal_n_loops) for r in rv.keys()}
|
|
180
|
+
|
|
181
|
+
if "non_helpful_loops_for_loop_orders" in option:
|
|
182
|
+
for i in range(len(temporal_n_loops)):
|
|
183
|
+
temporal_n_loops[i] = len(rv)
|
|
184
|
+
|
|
185
|
+
# Count number of tile shapes
|
|
186
|
+
rv2loops = {r: rv_spatial_count[r] + rv_temporal_count[r] for r in rv}
|
|
187
|
+
n_factorizations = math.prod(
|
|
188
|
+
_count_factorizations(b, rv2loops[r], imperfect=IMPERFECT)
|
|
189
|
+
for r, b in rv.items()
|
|
190
|
+
)
|
|
191
|
+
n_temporal_loop_orders = math.prod(math.factorial(n) for n in temporal_n_loops)
|
|
192
|
+
|
|
193
|
+
n = n_factorizations
|
|
194
|
+
|
|
195
|
+
# assert n >= n_pmappings, f"n_pmappings: {n_pmappings} > n: {n}"
|
|
196
|
+
|
|
197
|
+
if "redundant_loop_orders" in option:
|
|
198
|
+
# job.mapping._n_loop_orders is the number of permutations that we actually
|
|
199
|
+
# evaluate. Don't want to double count them.
|
|
200
|
+
n *= n_temporal_loop_orders / job.mapping._n_loop_orders
|
|
201
|
+
|
|
202
|
+
# assert n >= n_pmappings, f"n_pmappings: {n_pmappings} > n: {n}"
|
|
203
|
+
|
|
204
|
+
return n
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def assert_all_jobs_have_same_symbols(
|
|
208
|
+
jobs_with_similar_compatibilities: SameCompatibilityJobs,
|
|
209
|
+
):
|
|
210
|
+
iteration2symbols = []
|
|
211
|
+
for j in jobs_with_similar_compatibilities:
|
|
212
|
+
for t in j.compatibility.tensors:
|
|
213
|
+
for i, l in enumerate(t.loops):
|
|
214
|
+
if len(iteration2symbols) <= i:
|
|
215
|
+
iteration2symbols.append(set())
|
|
216
|
+
iteration2symbols[i].add(l.tile_pattern.calculated_n_iterations)
|
|
217
|
+
assert all(
|
|
218
|
+
len(s) == 1 for s in iteration2symbols
|
|
219
|
+
), "All jobs must have the same symbols for compatibility n_iterations"
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def make_pmappings_from_templates(
|
|
223
|
+
jobs_with_similar_compatibilities: SameCompatibilityJobs,
|
|
224
|
+
) -> tuple[EinsumName, list[PmappingGroup], dict[UUID, Mapping], SameCompatibilityJobs]:
|
|
225
|
+
jwsc = jobs_with_similar_compatibilities
|
|
226
|
+
|
|
227
|
+
results = []
|
|
228
|
+
|
|
229
|
+
for job in jobs_with_similar_compatibilities:
|
|
230
|
+
try:
|
|
231
|
+
result, tensor2mapping = make_tile_shapes(job)
|
|
232
|
+
except Exception as e:
|
|
233
|
+
e.add_note(f"Einsum {jwsc.einsum_name} compatibility {job.compatibility}")
|
|
234
|
+
raise
|
|
235
|
+
job.compatibility = job.compatibility.populate_loops()
|
|
236
|
+
|
|
237
|
+
# Ctrl-F for CONTIGUOUS_ITERATION_SPACE_DISCUSSION TODO: Turn tensor2pmapping
|
|
238
|
+
# into per-tensor compatibility
|
|
239
|
+
|
|
240
|
+
# This changes the pmapping count to include superfluous permutations
|
|
241
|
+
# TODO: Add a multiplier for the permutations that we include in the fusion
|
|
242
|
+
# piece, which are NOT known to be superfluous
|
|
243
|
+
|
|
244
|
+
# prev = job.spec.mapper.ffm._count_option_for_mapsapce_size_evaluation
|
|
245
|
+
# job.spec.mapper.ffm._count_option_for_mapsapce_size_evaluation = "redundant_loop_orders_and_irrelevant_loops"
|
|
246
|
+
# a = multiply_n_pmappings_by_permutations(job.n_total_pmappings, job)
|
|
247
|
+
# job.spec.mapper.ffm._count_option_for_mapsapce_size_evaluation = "redundant_loop_orders"
|
|
248
|
+
# b = multiply_n_pmappings_by_permutations(job.n_total_pmappings, job)
|
|
249
|
+
|
|
250
|
+
# if a < b:
|
|
251
|
+
# job.spec.mapper.ffm._count_option_for_mapsapce_size_evaluation = "redundant_loop_orders_and_irrelevant_loops"
|
|
252
|
+
# a = multiply_n_pmappings_by_permutations(job.n_total_pmappings, job)
|
|
253
|
+
# job.spec.mapper.ffm._count_option_for_mapsapce_size_evaluation = "redundant_loop_orders"
|
|
254
|
+
# b = multiply_n_pmappings_by_permutations(job.n_total_pmappings, job)
|
|
255
|
+
# assert False
|
|
256
|
+
|
|
257
|
+
# job.spec.mapper.ffm._count_option_for_mapsapce_size_evaluation = prev
|
|
258
|
+
job.n_total_pmappings = multiply_n_pmappings_by_permutations(
|
|
259
|
+
job.n_total_pmappings, job
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
result[MAPPING_COLUMN] = job.job_id
|
|
263
|
+
cols_to_drop = []
|
|
264
|
+
for col in result.columns:
|
|
265
|
+
if is_reservation_col(col):
|
|
266
|
+
resource = col2nameloop(col)[0]
|
|
267
|
+
if resource in job.memories_track_pmappings_only:
|
|
268
|
+
cols_to_drop.append(col)
|
|
269
|
+
assert resource not in job.ignored_resources, "Should have been ignored"
|
|
270
|
+
result.drop(columns=cols_to_drop, inplace=True)
|
|
271
|
+
results.append(result)
|
|
272
|
+
|
|
273
|
+
fusable_tensors = jwsc.fusable_tensors
|
|
274
|
+
einsum_name = jwsc.einsum_name
|
|
275
|
+
metrics = jwsc.metrics
|
|
276
|
+
limit_capacity_drop_valid_reservations = not (Metrics.RESOURCE_USAGE & metrics)
|
|
277
|
+
compatibility = jwsc.compatibility
|
|
278
|
+
|
|
279
|
+
# Creating a PmappingDataframe fills in reservation columns since different pmappings
|
|
280
|
+
# have different ones.
|
|
281
|
+
next_shared_loop_index = compatibility.n_loops - 1
|
|
282
|
+
df = PmappingDataframe.concat(
|
|
283
|
+
[
|
|
284
|
+
PmappingDataframe(
|
|
285
|
+
r,
|
|
286
|
+
skip_pareto=True,
|
|
287
|
+
next_shared_loop_index=next_shared_loop_index,
|
|
288
|
+
n_total_pmappings=1, # Unused for now, just making an initial Pareto
|
|
289
|
+
n_valid_pmappings=1, # Unused for now, just making an initial Pareto
|
|
290
|
+
ignored_resources=job.ignored_resources,
|
|
291
|
+
# False because we may have lifetimes that stretch through this Einsum
|
|
292
|
+
# due to data dependencies, not loops
|
|
293
|
+
limit_capacity_drop_valid_reservations=False,
|
|
294
|
+
)
|
|
295
|
+
for r in results
|
|
296
|
+
],
|
|
297
|
+
skip_pareto=True,
|
|
298
|
+
).data
|
|
299
|
+
if df.empty:
|
|
300
|
+
return einsum_name, [], {}, jobs_with_similar_compatibilities
|
|
301
|
+
|
|
302
|
+
tensor_cols = [tensor2col(tensor) for tensor in fusable_tensors]
|
|
303
|
+
df.columns = [
|
|
304
|
+
c if col_used_in_pareto(c) or c in tensor_cols else f"{einsum_name}<SEP>{c}"
|
|
305
|
+
for c in df.columns
|
|
306
|
+
]
|
|
307
|
+
|
|
308
|
+
fused_loop_cols = [
|
|
309
|
+
f"{einsum_name}<SEP>{c}"
|
|
310
|
+
for c in compatibility.symbols()
|
|
311
|
+
if not is_n_iterations_col(c)
|
|
312
|
+
]
|
|
313
|
+
|
|
314
|
+
job0 = next(iter(jobs_with_similar_compatibilities))
|
|
315
|
+
|
|
316
|
+
# Pareto prune
|
|
317
|
+
df = makepareto(df, split_by_cols=fused_loop_cols).copy()
|
|
318
|
+
|
|
319
|
+
jobs_passed_pareto = sorted(df[f"{einsum_name}<SEP>{MAPPING_COLUMN}"].unique())
|
|
320
|
+
pmapping_objects = {
|
|
321
|
+
job.job_id: job.mapping
|
|
322
|
+
for job in jobs_with_similar_compatibilities
|
|
323
|
+
if job.job_id in jobs_passed_pareto
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
assert_all_jobs_have_same_symbols(jobs_with_similar_compatibilities)
|
|
327
|
+
# Otherwise, following logic fails
|
|
328
|
+
|
|
329
|
+
df["fused_loop_indices"] = get_fused_loop_indices(
|
|
330
|
+
df, job0.compatibility, einsum_name, return_as_int=True
|
|
331
|
+
)
|
|
332
|
+
groups = list(df.groupby(["fused_loop_indices"]))
|
|
333
|
+
total_pmappings_per_group = sum(
|
|
334
|
+
j.n_total_pmappings for j in jobs_with_similar_compatibilities
|
|
335
|
+
) / len(groups)
|
|
336
|
+
valid_pmappings_per_group = sum(
|
|
337
|
+
j.n_valid_pmappings for j in jobs_with_similar_compatibilities
|
|
338
|
+
) / len(groups)
|
|
339
|
+
|
|
340
|
+
pmapping_groups = []
|
|
341
|
+
for _, mappings in groups:
|
|
342
|
+
compatibility = jwsc.compatibility
|
|
343
|
+
fused_loop_indices = []
|
|
344
|
+
|
|
345
|
+
for i, f in enumerate(
|
|
346
|
+
get_fused_loop_indices(
|
|
347
|
+
mappings, compatibility, einsum_name, return_as_int=False
|
|
348
|
+
)
|
|
349
|
+
):
|
|
350
|
+
if f:
|
|
351
|
+
fused_loop_indices.append(i)
|
|
352
|
+
|
|
353
|
+
null_loop_indices = tuple(
|
|
354
|
+
i for i in range(compatibility.n_loops) if i not in fused_loop_indices
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
dropcols = ["fused_loop_indices"]
|
|
358
|
+
mappings = mappings.drop(columns=dropcols)
|
|
359
|
+
|
|
360
|
+
compatibility = compatibility.drop_loop_indices(null_loop_indices)
|
|
361
|
+
|
|
362
|
+
symbol_renames, compatibility = compatibility.make_fused_loop_symbols(
|
|
363
|
+
einsum_name
|
|
364
|
+
)
|
|
365
|
+
for k, v in symbol_renames.items():
|
|
366
|
+
mappings[v] = mappings[f"{einsum_name}<SEP>{k}"]
|
|
367
|
+
shift_reservations_by_null_loop_indices(mappings, null_loop_indices)
|
|
368
|
+
|
|
369
|
+
symbols = compatibility.symbols()
|
|
370
|
+
dropcols = [
|
|
371
|
+
c for c in mappings.columns if is_fused_loop_col(c) and c not in symbols
|
|
372
|
+
]
|
|
373
|
+
mappings = mappings.drop(columns=dropcols)
|
|
374
|
+
|
|
375
|
+
energy_cols = [c for c in mappings.columns if "Total<SEP>energy" in c]
|
|
376
|
+
if (mappings[energy_cols] < 0).any(axis=None):
|
|
377
|
+
mapping_with_negative_energy = mappings[
|
|
378
|
+
(mappings[energy_cols] < 0).any(axis=1)
|
|
379
|
+
]
|
|
380
|
+
msg = ""
|
|
381
|
+
for _, row in mapping_with_negative_energy.iterrows():
|
|
382
|
+
for k, v in row.items():
|
|
383
|
+
msg += f"{k}: {v}\n"
|
|
384
|
+
msg += "\n"
|
|
385
|
+
raise RuntimeError(f"negative energy:\n{msg}")
|
|
386
|
+
|
|
387
|
+
# Skip pareto because we already did it above
|
|
388
|
+
next_shared_loop_index_this_group = compatibility.n_loops - 1
|
|
389
|
+
partial_mappings = PmappingDataframe(
|
|
390
|
+
mappings,
|
|
391
|
+
next_shared_loop_index=next_shared_loop_index_this_group,
|
|
392
|
+
n_total_pmappings=total_pmappings_per_group,
|
|
393
|
+
n_valid_pmappings=valid_pmappings_per_group,
|
|
394
|
+
skip_pareto=next_shared_loop_index_this_group == next_shared_loop_index,
|
|
395
|
+
ignored_resources=job.ignored_resources,
|
|
396
|
+
# False because we may have lifetimes that stretch through this Einsum
|
|
397
|
+
# due to data dependencies, not loops
|
|
398
|
+
limit_capacity_drop_valid_reservations=False,
|
|
399
|
+
)
|
|
400
|
+
pmapping_groups.append(PmappingGroup(compatibility, partial_mappings))
|
|
401
|
+
|
|
402
|
+
return (
|
|
403
|
+
einsum_name,
|
|
404
|
+
pmapping_groups,
|
|
405
|
+
pmapping_objects,
|
|
406
|
+
jobs_with_similar_compatibilities,
|
|
407
|
+
)
|