accelforge 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (258) hide show
  1. accelforge/__init__.py +21 -0
  2. accelforge/_accelerated_imports.py +16 -0
  3. accelforge/_deprecate/_simanneal/evalmapping.py +271 -0
  4. accelforge/_deprecate/_simanneal/mapspaceglobals.py +298 -0
  5. accelforge/_deprecate/_simanneal/simanneal.py +666 -0
  6. accelforge/_deprecate/_simanneal/tracking.py +105 -0
  7. accelforge/_deprecate/_simanneal/wrappers.py +218 -0
  8. accelforge/_deprecate/_simanneal2/__init__.py +7 -0
  9. accelforge/_deprecate/_simanneal2/simanneal.py +493 -0
  10. accelforge/_deprecate/_simanneal2/tracking.py +116 -0
  11. accelforge/_deprecate/compatibility_util.py +181 -0
  12. accelforge/_deprecate/layerdeduplication/__init__.py +2 -0
  13. accelforge/_deprecate/layerdeduplication/group_similar_einsums.py +160 -0
  14. accelforge/_deprecate/layerdeduplication/grouped_einsums.py +84 -0
  15. accelforge/_deprecate/mapping_filter_tags/__init__.py +2 -0
  16. accelforge/_deprecate/mapping_filter_tags/ffmt.py +212 -0
  17. accelforge/_deprecate/mapping_filter_tags/onesplit.py +24 -0
  18. accelforge/_deprecate/mapping_filter_tags/util.py +24 -0
  19. accelforge/_deprecate/tags.py +69 -0
  20. accelforge/_deprecate/viz/__init__.py +0 -0
  21. accelforge/_deprecate/viz/interactive.py +159 -0
  22. accelforge/_deprecate/viz/reservationtree.py +307 -0
  23. accelforge/_deprecate/viz/ski_slope.py +88 -0
  24. accelforge/_version.py +15 -0
  25. accelforge/examples.py +39 -0
  26. accelforge/frontend/__init__.py +10 -0
  27. accelforge/frontend/_binding.py +129 -0
  28. accelforge/frontend/_workload_isl/__init__.py +2 -0
  29. accelforge/frontend/_workload_isl/_isl.py +149 -0
  30. accelforge/frontend/_workload_isl/_symbolic.py +141 -0
  31. accelforge/frontend/arch copy.py +1544 -0
  32. accelforge/frontend/arch.py +1642 -0
  33. accelforge/frontend/config.py +63 -0
  34. accelforge/frontend/mapper/__init__.py +5 -0
  35. accelforge/frontend/mapper/ffm.py +126 -0
  36. accelforge/frontend/mapper/mapper.py +7 -0
  37. accelforge/frontend/mapper/metrics.py +30 -0
  38. accelforge/frontend/mapping/__init__.py +1 -0
  39. accelforge/frontend/mapping/mapping.py +1736 -0
  40. accelforge/frontend/model.py +14 -0
  41. accelforge/frontend/renames.py +150 -0
  42. accelforge/frontend/spec copy.py +230 -0
  43. accelforge/frontend/spec.py +301 -0
  44. accelforge/frontend/variables.py +12 -0
  45. accelforge/frontend/workload.py +952 -0
  46. accelforge/mapper/FFM/__init__.py +9 -0
  47. accelforge/mapper/FFM/_join_pmappings/__init__.py +0 -0
  48. accelforge/mapper/FFM/_join_pmappings/compatibility.py +653 -0
  49. accelforge/mapper/FFM/_join_pmappings/compress_pmappings.py +140 -0
  50. accelforge/mapper/FFM/_join_pmappings/join_pmappings.py +703 -0
  51. accelforge/mapper/FFM/_join_pmappings/pmapping_dataframe.py +901 -0
  52. accelforge/mapper/FFM/_join_pmappings/pmapping_group.py +337 -0
  53. accelforge/mapper/FFM/_make_pmappings/contraints/__init__.py +0 -0
  54. accelforge/mapper/FFM/_make_pmappings/contraints/constraints.py +360 -0
  55. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/__init__.py +1 -0
  56. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_loops.py +373 -0
  57. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_pmapping_templates.py +463 -0
  58. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_reservations.py +95 -0
  59. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storage_order.py +382 -0
  60. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storages.py +155 -0
  61. accelforge/mapper/FFM/_make_pmappings/make_pmappings.py +411 -0
  62. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/__init__.py +1 -0
  63. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_pmappings_from_templates.py +407 -0
  64. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_tile_shapes.py +1681 -0
  65. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/run_model.py +170 -0
  66. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/symbol_relations.py +174 -0
  67. accelforge/mapper/FFM/_make_pmappings/pmapper_job.py +282 -0
  68. accelforge/mapper/FFM/_pareto_df/df_convention.py +273 -0
  69. accelforge/mapper/FFM/_pareto_df/pareto copy.py +836 -0
  70. accelforge/mapper/FFM/_pareto_df/pareto.py +508 -0
  71. accelforge/mapper/FFM/data.py +61 -0
  72. accelforge/mapper/FFM/main copy.py +236 -0
  73. accelforge/mapper/FFM/main.py +208 -0
  74. accelforge/mapper/FFM/mappings.py +510 -0
  75. accelforge/mapper/FFM/pmappings.py +310 -0
  76. accelforge/mapper/__init__.py +4 -0
  77. accelforge/mapper.py +0 -0
  78. accelforge/model/__init__.py +1 -0
  79. accelforge/model/_looptree/__init__.py +0 -0
  80. accelforge/model/_looptree/accesses.py +335 -0
  81. accelforge/model/_looptree/capacity/__init__.py +1 -0
  82. accelforge/model/_looptree/capacity/aggregators.py +36 -0
  83. accelforge/model/_looptree/capacity/capacity.py +47 -0
  84. accelforge/model/_looptree/energy.py +150 -0
  85. accelforge/model/_looptree/equivalent_ranks.py +29 -0
  86. accelforge/model/_looptree/latency/__init__.py +1 -0
  87. accelforge/model/_looptree/latency/latency.py +98 -0
  88. accelforge/model/_looptree/latency/memory.py +120 -0
  89. accelforge/model/_looptree/latency/processors.py +92 -0
  90. accelforge/model/_looptree/mapping_utilities.py +71 -0
  91. accelforge/model/_looptree/reuse/__init__.py +4 -0
  92. accelforge/model/_looptree/reuse/isl/__init__.py +1 -0
  93. accelforge/model/_looptree/reuse/isl/des.py +59 -0
  94. accelforge/model/_looptree/reuse/isl/isl_functions.py +374 -0
  95. accelforge/model/_looptree/reuse/isl/mapping_to_isl/__init__.py +4 -0
  96. accelforge/model/_looptree/reuse/isl/mapping_to_isl/analyze_mapping.py +297 -0
  97. accelforge/model/_looptree/reuse/isl/mapping_to_isl/skews_from_mapping.py +236 -0
  98. accelforge/model/_looptree/reuse/isl/mapping_to_isl/tiling.py +685 -0
  99. accelforge/model/_looptree/reuse/isl/mapping_to_isl/types.py +188 -0
  100. accelforge/model/_looptree/reuse/isl/spatial.py +260 -0
  101. accelforge/model/_looptree/reuse/isl/temporal.py +182 -0
  102. accelforge/model/_looptree/reuse/symbolic/__init__.py +1 -0
  103. accelforge/model/_looptree/reuse/symbolic/symbolic copy 2.py +1346 -0
  104. accelforge/model/_looptree/reuse/symbolic/symbolic copy.py +1408 -0
  105. accelforge/model/_looptree/reuse/symbolic/symbolic.py +1396 -0
  106. accelforge/model/_looptree/run.py +122 -0
  107. accelforge/model/_looptree/types.py +26 -0
  108. accelforge/model/_looptree/visualization/__init__.py +0 -0
  109. accelforge/model/_looptree/visualization/occupancy.py +11 -0
  110. accelforge/model/main.py +222 -0
  111. accelforge/plotting/__init__.py +2 -0
  112. accelforge/plotting/mappings.py +219 -0
  113. accelforge/plotting/specs.py +57 -0
  114. accelforge/util/__init__.py +4 -0
  115. accelforge/util/_base_analysis_types.py +24 -0
  116. accelforge/util/_basetypes.py +1089 -0
  117. accelforge/util/_frozenset.py +36 -0
  118. accelforge/util/_isl.py +29 -0
  119. accelforge/util/_itertools.py +14 -0
  120. accelforge/util/_mathfuncs.py +57 -0
  121. accelforge/util/_parse_expressions.py +339 -0
  122. accelforge/util/_picklecache.py +32 -0
  123. accelforge/util/_setexpressions.py +268 -0
  124. accelforge/util/_sympy/__init__.py +0 -0
  125. accelforge/util/_sympy/broadcast_max.py +18 -0
  126. accelforge/util/_visualization.py +112 -0
  127. accelforge/util/_yaml.py +579 -0
  128. accelforge/util/parallel.py +193 -0
  129. accelforge-0.0.1.dist-info/METADATA +64 -0
  130. accelforge-0.0.1.dist-info/RECORD +258 -0
  131. accelforge-0.0.1.dist-info/WHEEL +5 -0
  132. accelforge-0.0.1.dist-info/licenses/LICENSE +19 -0
  133. accelforge-0.0.1.dist-info/top_level.txt +5 -0
  134. docs/_build/html/_sources/fastfusion.frontend.mapper.rst.txt +37 -0
  135. docs/_build/html/_sources/fastfusion.frontend.rst.txt +70 -0
  136. docs/_build/html/_sources/fastfusion.frontend.workload.rst.txt +21 -0
  137. docs/_build/html/_sources/fastfusion.mapper.FFM.rst.txt +37 -0
  138. docs/_build/html/_sources/fastfusion.mapper.rst.txt +18 -0
  139. docs/_build/html/_sources/fastfusion.rst.txt +20 -0
  140. docs/_build/html/_sources/fastfusion.util.rst.txt +21 -0
  141. docs/_build/html/_sources/index.rst.txt +87 -0
  142. docs/_build/html/_sources/modules.rst.txt +7 -0
  143. docs/_build/html/_sources/notes/citation.rst.txt +45 -0
  144. docs/_build/html/_sources/notes/definitions.rst.txt +43 -0
  145. docs/_build/html/_sources/notes/faqs.rst.txt +39 -0
  146. docs/_build/html/_sources/notes/modeling/accelerator_energy_latency.rst.txt +72 -0
  147. docs/_build/html/_sources/notes/modeling/component_energy_area.rst.txt +96 -0
  148. docs/_build/html/_sources/notes/modeling/mapping.rst.txt +100 -0
  149. docs/_build/html/_sources/notes/modeling.rst.txt +33 -0
  150. docs/_build/html/_sources/notes/parsing/arithmetic_parsing.rst.txt +136 -0
  151. docs/_build/html/_sources/notes/parsing/setexpressions.rst.txt +63 -0
  152. docs/_build/html/_sources/notes/parsing/yaml_parsing.rst.txt +176 -0
  153. docs/_build/html/_sources/notes/quickstart_and_installation.rst.txt +9 -0
  154. docs/_build/html/_sources/notes/spec/architecture.rst.txt +133 -0
  155. docs/_build/html/_sources/notes/spec/mapping.rst.txt +12 -0
  156. docs/_build/html/_sources/notes/spec/workload.rst.txt +83 -0
  157. docs/_build/html/_sources/notes/spec.rst.txt +36 -0
  158. docs/source/_ext/include_attrs.py +213 -0
  159. docs/source/_ext/include_docstring.py +364 -0
  160. docs/source/_ext/include_functions.py +154 -0
  161. docs/source/_ext/include_notebook.py +131 -0
  162. docs/source/_ext/include_yaml.py +119 -0
  163. docs/source/_ext/inherited_attributes.py +222 -0
  164. docs/source/_ext/paths.py +4 -0
  165. docs/source/conf.py +79 -0
  166. examples/arches/compute_in_memory/_include.yaml +74 -0
  167. examples/arches/compute_in_memory/_include_functions.py +229 -0
  168. examples/arches/compute_in_memory/_load_spec.py +57 -0
  169. examples/arches/compute_in_memory/components/c2c_multiplier.py +181 -0
  170. examples/arches/compute_in_memory/components/dac_c2c_r2r.py +605 -0
  171. examples/arches/compute_in_memory/components/misc.py +195 -0
  172. examples/arches/compute_in_memory/components/util/bit_functions.py +51 -0
  173. examples/arches/compute_in_memory/components/zero_comparator.py +92 -0
  174. examples/arches/compute_in_memory/isaac.yaml +233 -0
  175. examples/arches/compute_in_memory/memory_cells/ecram_demo.yaml +63 -0
  176. examples/arches/compute_in_memory/memory_cells/rram_example.yaml +63 -0
  177. examples/arches/compute_in_memory/memory_cells/rram_isaac_isca_2016.yaml +64 -0
  178. examples/arches/compute_in_memory/memory_cells/rram_neurosim_default.yaml +63 -0
  179. examples/arches/compute_in_memory/memory_cells/rram_raella_isca_2023.yaml +70 -0
  180. examples/arches/compute_in_memory/memory_cells/rram_wan_nature_2022.yaml +63 -0
  181. examples/arches/compute_in_memory/memory_cells/sram_colonnade_jssc_2021.yaml +63 -0
  182. examples/arches/compute_in_memory/memory_cells/sram_example.yaml +63 -0
  183. examples/arches/compute_in_memory/memory_cells/sram_jia_jssc_2020.yaml +63 -0
  184. examples/arches/compute_in_memory/memory_cells/sram_sinangil_jssc_2021.yaml +63 -0
  185. examples/arches/compute_in_memory/memory_cells/sram_wang_vlsi_2022.yaml +63 -0
  186. examples/arches/compute_in_memory/wang_vlsi_2022.yaml +289 -0
  187. examples/arches/eyeriss.yaml +68 -0
  188. examples/arches/fanout_variations/at_glb.yaml +31 -0
  189. examples/arches/fanout_variations/at_glb_with_fanout_node.yaml +34 -0
  190. examples/arches/fanout_variations/at_mac.yaml +31 -0
  191. examples/arches/fanout_variations/at_mac_with_constraints.yaml +38 -0
  192. examples/arches/fanout_variations/at_mac_with_fanout_node.yaml +34 -0
  193. examples/arches/nvdla.yaml +47 -0
  194. examples/arches/simple.yaml +28 -0
  195. examples/arches/tpu_v4i.yaml +67 -0
  196. examples/mappings/unfused_matmuls_to_simple.yaml +33 -0
  197. examples/misc/component_annotated.yaml +33 -0
  198. examples/workloads/gpt3_6.7B.yaml +124 -0
  199. examples/workloads/matmuls.yaml +20 -0
  200. examples/workloads/mobilenet_28.yaml +81 -0
  201. examples/workloads/mobilenet_various_separate.yaml +106 -0
  202. examples/workloads/three_matmuls_annotated.yaml +59 -0
  203. notebooks/.ipynb_checkpoints/fastfusion_arch_study_michael-checkpoint.ipynb +359 -0
  204. notebooks/compute_in_memory/_scripts.py +339 -0
  205. notebooks/compute_in_memory/isaac.guide.ipynb +270 -0
  206. notebooks/compute_in_memory/wang_vlsi_2022.ipynb +602 -0
  207. notebooks/paths.py +4 -0
  208. notebooks/tutorials/.ipynb_checkpoints/1_FFM-checkpoint.ipynb +3110 -0
  209. notebooks/tutorials/FFM.ipynb +3498 -0
  210. notebooks/tutorials/_include.py +48 -0
  211. notebooks/tutorials/component_energy_area.ipynb +363 -0
  212. tests/Q_mapping.yaml +38 -0
  213. tests/__init__.py +0 -0
  214. tests/conv.mapping.yaml +27 -0
  215. tests/conv.workload.yaml +13 -0
  216. tests/conv_sym.mapping.yaml +43 -0
  217. tests/copy.mapping.yaml +35 -0
  218. tests/copy.workload.yaml +15 -0
  219. tests/distribuffers/__init__.py +0 -0
  220. tests/distribuffers/multicast/test_cases.yaml +482 -0
  221. tests/distribuffers/spec/binding/valid_bindings.yaml +97 -0
  222. tests/distribuffers/spec/distributed.yaml +100 -0
  223. tests/distribuffers/spec/logical_arch.yaml +32 -0
  224. tests/distribuffers/spec/physical_arch.yaml +69 -0
  225. tests/distribuffers/test_binding.py +48 -0
  226. tests/frontend/__init__.py +0 -0
  227. tests/frontend/test_mapping_viz.py +52 -0
  228. tests/mapper/__init__.py +0 -0
  229. tests/mapper/configs/conv1d/conv1d.mapping.yaml +31 -0
  230. tests/mapper/configs/conv1d/conv1d.workload.yaml +11 -0
  231. tests/mapper/configs/two_conv1d/two_conv1d.expected.yaml +38 -0
  232. tests/mapper/configs/two_conv1d/two_conv1d.mapping.yaml +54 -0
  233. tests/mapper/configs/two_conv1d/two_conv1d.workload.yaml +19 -0
  234. tests/mapper/test_mapping_to_isl.py +90 -0
  235. tests/mapper/test_spatial_reuse_analysis.py +67 -0
  236. tests/mapper/test_temporal_reuse_analysis.py +56 -0
  237. tests/mapper/util.py +58 -0
  238. tests/matmul.mapping.yaml +29 -0
  239. tests/matmul.workload.yaml +12 -0
  240. tests/matmul_spatial.mapping.yaml +44 -0
  241. tests/mha.renames.yaml +65 -0
  242. tests/mha.workload.yaml +67 -0
  243. tests/mha.yaml +59 -0
  244. tests/mha_full.workload.yaml +67 -0
  245. tests/mobilenet.workload.yaml +35 -0
  246. tests/mobilenet_long.workload.yaml +64 -0
  247. tests/pmappingcache.py +24 -0
  248. tests/processing_stage.arch.yaml +40 -0
  249. tests/snowcat.arch.yaml +36 -0
  250. tests/test_ffm_join_pmappings.py +106 -0
  251. tests/test_ffm_make_pmappings.py +82 -0
  252. tests/test_ffm_make_tile_shapes.py +49 -0
  253. tests/test_mapper.py +100 -0
  254. tests/test_model.py +37 -0
  255. tests/test_plotting.py +72 -0
  256. tests/test_processing_stage.py +46 -0
  257. tests/test_symbolic_model.py +248 -0
  258. tests/test_workload.py +141 -0
@@ -0,0 +1,407 @@
1
+ import copy
2
+ import math
3
+ import pandas as pd
4
+ from uuid import UUID
5
+ from collections import defaultdict
6
+
7
+ import sympy
8
+ from accelforge.frontend.mapping import Loop, Mapping, Spatial, Temporal
9
+ from accelforge.frontend.workload import EinsumName
10
+ from accelforge.mapper.FFM._join_pmappings.compatibility import (
11
+ Compatibility,
12
+ )
13
+ from accelforge.mapper.FFM._join_pmappings.pmapping_dataframe import (
14
+ MAPPING_COLUMN,
15
+ PmappingDataframe,
16
+ col2nameloop,
17
+ col_used_in_pareto,
18
+ is_reservation_col,
19
+ makepareto,
20
+ tensor2col,
21
+ col2nameloop,
22
+ is_reservation_col,
23
+ nameloop2col,
24
+ )
25
+
26
+ from accelforge.frontend.mapper.metrics import Metrics
27
+ from accelforge.mapper.FFM._make_pmappings.make_pmappings_from_templates.make_tile_shapes import (
28
+ make_tile_shapes,
29
+ IMPERFECT,
30
+ )
31
+ from accelforge.mapper.FFM._join_pmappings.pmapping_group import PmappingGroup
32
+ from accelforge.mapper.FFM._make_pmappings.pmapper_job import (
33
+ Job,
34
+ SameCompatibilityJobs,
35
+ )
36
+ from accelforge.mapper.FFM._pareto_df.df_convention import (
37
+ is_fused_loop_col,
38
+ is_n_iterations_col,
39
+ )
40
+ from accelforge.util._mathfuncs import _count_factorizations
41
+
42
+
43
+ def shift_reservations_by_null_loop_indices(
44
+ mappings: pd.DataFrame, null_loop_indices: set[int]
45
+ ):
46
+ prev = copy.deepcopy(mappings) # TODO: Is this needed?
47
+ target2newabovename = {}
48
+ dropcols = []
49
+ for c in mappings.columns:
50
+ if not is_reservation_col(c):
51
+ continue
52
+ name, above = col2nameloop(c)
53
+ new_above = above - sum(above > i for i in null_loop_indices)
54
+ target = nameloop2col(name, new_above)
55
+ if target in target2newabovename:
56
+ if above > target2newabovename[target][1]:
57
+ dropcols.append(nameloop2col(*target2newabovename[target]))
58
+ target2newabovename[target] = (name, above)
59
+ else:
60
+ dropcols.append(c)
61
+ else:
62
+ target2newabovename[target] = (name, above)
63
+
64
+ mappings.drop(columns=dropcols, inplace=True)
65
+ renames = {}
66
+ for target, (name, above) in target2newabovename.items():
67
+ renames[nameloop2col(name, above)] = target
68
+ mappings.rename(columns=renames, inplace=True)
69
+ if len(mappings.columns) != len(mappings.columns.unique()):
70
+ shift_reservations_by_null_loop_indices(prev, null_loop_indices)
71
+ raise ValueError(f"Duplicate columns: {mappings.columns}")
72
+ assert len(mappings.columns) == len(mappings.columns.unique())
73
+ return mappings
74
+
75
+
76
+ def get_equivalent_pmappings(
77
+ pmapping_group: PmappingGroup, reservation_levels: set[int]
78
+ ) -> list[PmappingGroup]:
79
+ equivalent_permutations = pmapping_group.compatibility.make_equivalent_permutations(
80
+ reservation_levels
81
+ )
82
+ result = [PmappingGroup(c, None) for c in equivalent_permutations]
83
+ return result
84
+
85
+
86
+ def mapping2fused_loop_cols(mapping: Mapping, einsum_name: EinsumName):
87
+ cols = []
88
+ for loop in [l for l in mapping.nodes if isinstance(l, Loop) and l._fused]:
89
+ if loop.tile_shape is not None:
90
+ cols.append(loop.tile_shape)
91
+ elif loop.tile_pattern is not None:
92
+ cols.append(loop.tile_pattern.tile_shape)
93
+ cols.append(loop.tile_pattern.initial_tile_shape)
94
+ else:
95
+ raise ValueError(f"Can't find tile shape or tile pattern for loop {loop}")
96
+ return [f"{einsum_name}<SEP>{c}" if isinstance(c, str) else c for c in cols]
97
+
98
+
99
+ def get_fused_loop_indices(
100
+ df: pd.DataFrame,
101
+ compatibility: Compatibility,
102
+ einsum_name: EinsumName,
103
+ return_as_int: bool = False,
104
+ ) -> pd.Series | int:
105
+ result = []
106
+
107
+ loops = compatibility.loops
108
+ for i, loop in enumerate(loops):
109
+ col = loop.tile_pattern.calculated_n_iterations
110
+ assert col is not None, f"Loop {loop} has no calculated n_iterations"
111
+ if isinstance(col, str):
112
+ col = df[f"{einsum_name}<SEP>{col}"]
113
+ elif isinstance(col, sympy.Symbol):
114
+ col = df[f"{einsum_name}<SEP>{col.name}"]
115
+ result.append(col != 1)
116
+
117
+ if return_as_int:
118
+ n = 0
119
+ for b in result:
120
+ n = n * 2 + b
121
+ return n
122
+ else:
123
+ r2 = []
124
+ for b in result:
125
+ if len(b.unique()) > 1:
126
+ raise ValueError(f"This won't work if there's more than one")
127
+ r2.append(b.iloc[0])
128
+ return r2
129
+
130
+
131
+ def _count_loops(job: Job) -> tuple[list[int], list[int], dict[str, int]]:
132
+ nodes = job.mapping.nodes
133
+ temporal_n_loops = []
134
+ spatial_n_loops = []
135
+ rv_spatial_count = defaultdict(int)
136
+ rv_temporal_count = defaultdict(int)
137
+ cur_n_loops = 0
138
+ spatial_dim = None
139
+
140
+ def pop_loop():
141
+ nonlocal cur_n_loops
142
+ if cur_n_loops >= 1:
143
+ if spatial_dim is not None:
144
+ spatial_n_loops.append(cur_n_loops)
145
+ else:
146
+ temporal_n_loops.append(cur_n_loops)
147
+ cur_n_loops = 0
148
+
149
+ for node in nodes:
150
+ cur_spatial_dim = None
151
+ if isinstance(node, Spatial):
152
+ cur_spatial_dim = node.name
153
+ rv_spatial_count[node.rank_variable] += 1
154
+ if cur_spatial_dim != spatial_dim:
155
+ pop_loop()
156
+ spatial_dim = cur_spatial_dim
157
+ if isinstance(node, Loop):
158
+ cur_n_loops += 1
159
+ if isinstance(node, Temporal):
160
+ rv_temporal_count[node.rank_variable] += 1
161
+ else:
162
+ pop_loop()
163
+ pop_loop()
164
+ return temporal_n_loops, spatial_n_loops, rv_spatial_count, rv_temporal_count
165
+
166
+
167
+ def multiply_n_pmappings_by_permutations(n_pmappings: int, job: Job) -> int:
168
+ option = job.spec.mapper.ffm._count_option_for_mapsapce_size_evaluation
169
+ # if option == "normal":
170
+ # return n_pmappings
171
+
172
+ temporal_n_loops, spatial_n_loops, rv_spatial_count, rv_temporal_count = (
173
+ _count_loops(job)
174
+ )
175
+
176
+ rv = {k: v for k, v in job.rank_variable_bounds.items()}
177
+
178
+ if "non_helpful_tile_shapes" in option:
179
+ rv_temporal_count = {r: len(temporal_n_loops) for r in rv.keys()}
180
+
181
+ if "non_helpful_loops_for_loop_orders" in option:
182
+ for i in range(len(temporal_n_loops)):
183
+ temporal_n_loops[i] = len(rv)
184
+
185
+ # Count number of tile shapes
186
+ rv2loops = {r: rv_spatial_count[r] + rv_temporal_count[r] for r in rv}
187
+ n_factorizations = math.prod(
188
+ _count_factorizations(b, rv2loops[r], imperfect=IMPERFECT)
189
+ for r, b in rv.items()
190
+ )
191
+ n_temporal_loop_orders = math.prod(math.factorial(n) for n in temporal_n_loops)
192
+
193
+ n = n_factorizations
194
+
195
+ # assert n >= n_pmappings, f"n_pmappings: {n_pmappings} > n: {n}"
196
+
197
+ if "redundant_loop_orders" in option:
198
+ # job.mapping._n_loop_orders is the number of permutations that we actually
199
+ # evaluate. Don't want to double count them.
200
+ n *= n_temporal_loop_orders / job.mapping._n_loop_orders
201
+
202
+ # assert n >= n_pmappings, f"n_pmappings: {n_pmappings} > n: {n}"
203
+
204
+ return n
205
+
206
+
207
+ def assert_all_jobs_have_same_symbols(
208
+ jobs_with_similar_compatibilities: SameCompatibilityJobs,
209
+ ):
210
+ iteration2symbols = []
211
+ for j in jobs_with_similar_compatibilities:
212
+ for t in j.compatibility.tensors:
213
+ for i, l in enumerate(t.loops):
214
+ if len(iteration2symbols) <= i:
215
+ iteration2symbols.append(set())
216
+ iteration2symbols[i].add(l.tile_pattern.calculated_n_iterations)
217
+ assert all(
218
+ len(s) == 1 for s in iteration2symbols
219
+ ), "All jobs must have the same symbols for compatibility n_iterations"
220
+
221
+
222
+ def make_pmappings_from_templates(
223
+ jobs_with_similar_compatibilities: SameCompatibilityJobs,
224
+ ) -> tuple[EinsumName, list[PmappingGroup], dict[UUID, Mapping], SameCompatibilityJobs]:
225
+ jwsc = jobs_with_similar_compatibilities
226
+
227
+ results = []
228
+
229
+ for job in jobs_with_similar_compatibilities:
230
+ try:
231
+ result, tensor2mapping = make_tile_shapes(job)
232
+ except Exception as e:
233
+ e.add_note(f"Einsum {jwsc.einsum_name} compatibility {job.compatibility}")
234
+ raise
235
+ job.compatibility = job.compatibility.populate_loops()
236
+
237
+ # Ctrl-F for CONTIGUOUS_ITERATION_SPACE_DISCUSSION TODO: Turn tensor2pmapping
238
+ # into per-tensor compatibility
239
+
240
+ # This changes the pmapping count to include superfluous permutations
241
+ # TODO: Add a multiplier for the permutations that we include in the fusion
242
+ # piece, which are NOT known to be superfluous
243
+
244
+ # prev = job.spec.mapper.ffm._count_option_for_mapsapce_size_evaluation
245
+ # job.spec.mapper.ffm._count_option_for_mapsapce_size_evaluation = "redundant_loop_orders_and_irrelevant_loops"
246
+ # a = multiply_n_pmappings_by_permutations(job.n_total_pmappings, job)
247
+ # job.spec.mapper.ffm._count_option_for_mapsapce_size_evaluation = "redundant_loop_orders"
248
+ # b = multiply_n_pmappings_by_permutations(job.n_total_pmappings, job)
249
+
250
+ # if a < b:
251
+ # job.spec.mapper.ffm._count_option_for_mapsapce_size_evaluation = "redundant_loop_orders_and_irrelevant_loops"
252
+ # a = multiply_n_pmappings_by_permutations(job.n_total_pmappings, job)
253
+ # job.spec.mapper.ffm._count_option_for_mapsapce_size_evaluation = "redundant_loop_orders"
254
+ # b = multiply_n_pmappings_by_permutations(job.n_total_pmappings, job)
255
+ # assert False
256
+
257
+ # job.spec.mapper.ffm._count_option_for_mapsapce_size_evaluation = prev
258
+ job.n_total_pmappings = multiply_n_pmappings_by_permutations(
259
+ job.n_total_pmappings, job
260
+ )
261
+
262
+ result[MAPPING_COLUMN] = job.job_id
263
+ cols_to_drop = []
264
+ for col in result.columns:
265
+ if is_reservation_col(col):
266
+ resource = col2nameloop(col)[0]
267
+ if resource in job.memories_track_pmappings_only:
268
+ cols_to_drop.append(col)
269
+ assert resource not in job.ignored_resources, "Should have been ignored"
270
+ result.drop(columns=cols_to_drop, inplace=True)
271
+ results.append(result)
272
+
273
+ fusable_tensors = jwsc.fusable_tensors
274
+ einsum_name = jwsc.einsum_name
275
+ metrics = jwsc.metrics
276
+ limit_capacity_drop_valid_reservations = not (Metrics.RESOURCE_USAGE & metrics)
277
+ compatibility = jwsc.compatibility
278
+
279
+ # Creating a PmappingDataframe fills in reservation columns since different pmappings
280
+ # have different ones.
281
+ next_shared_loop_index = compatibility.n_loops - 1
282
+ df = PmappingDataframe.concat(
283
+ [
284
+ PmappingDataframe(
285
+ r,
286
+ skip_pareto=True,
287
+ next_shared_loop_index=next_shared_loop_index,
288
+ n_total_pmappings=1, # Unused for now, just making an initial Pareto
289
+ n_valid_pmappings=1, # Unused for now, just making an initial Pareto
290
+ ignored_resources=job.ignored_resources,
291
+ # False because we may have lifetimes that stretch through this Einsum
292
+ # due to data dependencies, not loops
293
+ limit_capacity_drop_valid_reservations=False,
294
+ )
295
+ for r in results
296
+ ],
297
+ skip_pareto=True,
298
+ ).data
299
+ if df.empty:
300
+ return einsum_name, [], {}, jobs_with_similar_compatibilities
301
+
302
+ tensor_cols = [tensor2col(tensor) for tensor in fusable_tensors]
303
+ df.columns = [
304
+ c if col_used_in_pareto(c) or c in tensor_cols else f"{einsum_name}<SEP>{c}"
305
+ for c in df.columns
306
+ ]
307
+
308
+ fused_loop_cols = [
309
+ f"{einsum_name}<SEP>{c}"
310
+ for c in compatibility.symbols()
311
+ if not is_n_iterations_col(c)
312
+ ]
313
+
314
+ job0 = next(iter(jobs_with_similar_compatibilities))
315
+
316
+ # Pareto prune
317
+ df = makepareto(df, split_by_cols=fused_loop_cols).copy()
318
+
319
+ jobs_passed_pareto = sorted(df[f"{einsum_name}<SEP>{MAPPING_COLUMN}"].unique())
320
+ pmapping_objects = {
321
+ job.job_id: job.mapping
322
+ for job in jobs_with_similar_compatibilities
323
+ if job.job_id in jobs_passed_pareto
324
+ }
325
+
326
+ assert_all_jobs_have_same_symbols(jobs_with_similar_compatibilities)
327
+ # Otherwise, following logic fails
328
+
329
+ df["fused_loop_indices"] = get_fused_loop_indices(
330
+ df, job0.compatibility, einsum_name, return_as_int=True
331
+ )
332
+ groups = list(df.groupby(["fused_loop_indices"]))
333
+ total_pmappings_per_group = sum(
334
+ j.n_total_pmappings for j in jobs_with_similar_compatibilities
335
+ ) / len(groups)
336
+ valid_pmappings_per_group = sum(
337
+ j.n_valid_pmappings for j in jobs_with_similar_compatibilities
338
+ ) / len(groups)
339
+
340
+ pmapping_groups = []
341
+ for _, mappings in groups:
342
+ compatibility = jwsc.compatibility
343
+ fused_loop_indices = []
344
+
345
+ for i, f in enumerate(
346
+ get_fused_loop_indices(
347
+ mappings, compatibility, einsum_name, return_as_int=False
348
+ )
349
+ ):
350
+ if f:
351
+ fused_loop_indices.append(i)
352
+
353
+ null_loop_indices = tuple(
354
+ i for i in range(compatibility.n_loops) if i not in fused_loop_indices
355
+ )
356
+
357
+ dropcols = ["fused_loop_indices"]
358
+ mappings = mappings.drop(columns=dropcols)
359
+
360
+ compatibility = compatibility.drop_loop_indices(null_loop_indices)
361
+
362
+ symbol_renames, compatibility = compatibility.make_fused_loop_symbols(
363
+ einsum_name
364
+ )
365
+ for k, v in symbol_renames.items():
366
+ mappings[v] = mappings[f"{einsum_name}<SEP>{k}"]
367
+ shift_reservations_by_null_loop_indices(mappings, null_loop_indices)
368
+
369
+ symbols = compatibility.symbols()
370
+ dropcols = [
371
+ c for c in mappings.columns if is_fused_loop_col(c) and c not in symbols
372
+ ]
373
+ mappings = mappings.drop(columns=dropcols)
374
+
375
+ energy_cols = [c for c in mappings.columns if "Total<SEP>energy" in c]
376
+ if (mappings[energy_cols] < 0).any(axis=None):
377
+ mapping_with_negative_energy = mappings[
378
+ (mappings[energy_cols] < 0).any(axis=1)
379
+ ]
380
+ msg = ""
381
+ for _, row in mapping_with_negative_energy.iterrows():
382
+ for k, v in row.items():
383
+ msg += f"{k}: {v}\n"
384
+ msg += "\n"
385
+ raise RuntimeError(f"negative energy:\n{msg}")
386
+
387
+ # Skip pareto because we already did it above
388
+ next_shared_loop_index_this_group = compatibility.n_loops - 1
389
+ partial_mappings = PmappingDataframe(
390
+ mappings,
391
+ next_shared_loop_index=next_shared_loop_index_this_group,
392
+ n_total_pmappings=total_pmappings_per_group,
393
+ n_valid_pmappings=valid_pmappings_per_group,
394
+ skip_pareto=next_shared_loop_index_this_group == next_shared_loop_index,
395
+ ignored_resources=job.ignored_resources,
396
+ # False because we may have lifetimes that stretch through this Einsum
397
+ # due to data dependencies, not loops
398
+ limit_capacity_drop_valid_reservations=False,
399
+ )
400
+ pmapping_groups.append(PmappingGroup(compatibility, partial_mappings))
401
+
402
+ return (
403
+ einsum_name,
404
+ pmapping_groups,
405
+ pmapping_objects,
406
+ jobs_with_similar_compatibilities,
407
+ )