accelforge 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (258) hide show
  1. accelforge/__init__.py +21 -0
  2. accelforge/_accelerated_imports.py +16 -0
  3. accelforge/_deprecate/_simanneal/evalmapping.py +271 -0
  4. accelforge/_deprecate/_simanneal/mapspaceglobals.py +298 -0
  5. accelforge/_deprecate/_simanneal/simanneal.py +666 -0
  6. accelforge/_deprecate/_simanneal/tracking.py +105 -0
  7. accelforge/_deprecate/_simanneal/wrappers.py +218 -0
  8. accelforge/_deprecate/_simanneal2/__init__.py +7 -0
  9. accelforge/_deprecate/_simanneal2/simanneal.py +493 -0
  10. accelforge/_deprecate/_simanneal2/tracking.py +116 -0
  11. accelforge/_deprecate/compatibility_util.py +181 -0
  12. accelforge/_deprecate/layerdeduplication/__init__.py +2 -0
  13. accelforge/_deprecate/layerdeduplication/group_similar_einsums.py +160 -0
  14. accelforge/_deprecate/layerdeduplication/grouped_einsums.py +84 -0
  15. accelforge/_deprecate/mapping_filter_tags/__init__.py +2 -0
  16. accelforge/_deprecate/mapping_filter_tags/ffmt.py +212 -0
  17. accelforge/_deprecate/mapping_filter_tags/onesplit.py +24 -0
  18. accelforge/_deprecate/mapping_filter_tags/util.py +24 -0
  19. accelforge/_deprecate/tags.py +69 -0
  20. accelforge/_deprecate/viz/__init__.py +0 -0
  21. accelforge/_deprecate/viz/interactive.py +159 -0
  22. accelforge/_deprecate/viz/reservationtree.py +307 -0
  23. accelforge/_deprecate/viz/ski_slope.py +88 -0
  24. accelforge/_version.py +15 -0
  25. accelforge/examples.py +39 -0
  26. accelforge/frontend/__init__.py +10 -0
  27. accelforge/frontend/_binding.py +129 -0
  28. accelforge/frontend/_workload_isl/__init__.py +2 -0
  29. accelforge/frontend/_workload_isl/_isl.py +149 -0
  30. accelforge/frontend/_workload_isl/_symbolic.py +141 -0
  31. accelforge/frontend/arch copy.py +1544 -0
  32. accelforge/frontend/arch.py +1642 -0
  33. accelforge/frontend/config.py +63 -0
  34. accelforge/frontend/mapper/__init__.py +5 -0
  35. accelforge/frontend/mapper/ffm.py +126 -0
  36. accelforge/frontend/mapper/mapper.py +7 -0
  37. accelforge/frontend/mapper/metrics.py +30 -0
  38. accelforge/frontend/mapping/__init__.py +1 -0
  39. accelforge/frontend/mapping/mapping.py +1736 -0
  40. accelforge/frontend/model.py +14 -0
  41. accelforge/frontend/renames.py +150 -0
  42. accelforge/frontend/spec copy.py +230 -0
  43. accelforge/frontend/spec.py +301 -0
  44. accelforge/frontend/variables.py +12 -0
  45. accelforge/frontend/workload.py +952 -0
  46. accelforge/mapper/FFM/__init__.py +9 -0
  47. accelforge/mapper/FFM/_join_pmappings/__init__.py +0 -0
  48. accelforge/mapper/FFM/_join_pmappings/compatibility.py +653 -0
  49. accelforge/mapper/FFM/_join_pmappings/compress_pmappings.py +140 -0
  50. accelforge/mapper/FFM/_join_pmappings/join_pmappings.py +703 -0
  51. accelforge/mapper/FFM/_join_pmappings/pmapping_dataframe.py +901 -0
  52. accelforge/mapper/FFM/_join_pmappings/pmapping_group.py +337 -0
  53. accelforge/mapper/FFM/_make_pmappings/contraints/__init__.py +0 -0
  54. accelforge/mapper/FFM/_make_pmappings/contraints/constraints.py +360 -0
  55. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/__init__.py +1 -0
  56. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_loops.py +373 -0
  57. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_pmapping_templates.py +463 -0
  58. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_reservations.py +95 -0
  59. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storage_order.py +382 -0
  60. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storages.py +155 -0
  61. accelforge/mapper/FFM/_make_pmappings/make_pmappings.py +411 -0
  62. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/__init__.py +1 -0
  63. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_pmappings_from_templates.py +407 -0
  64. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_tile_shapes.py +1681 -0
  65. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/run_model.py +170 -0
  66. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/symbol_relations.py +174 -0
  67. accelforge/mapper/FFM/_make_pmappings/pmapper_job.py +282 -0
  68. accelforge/mapper/FFM/_pareto_df/df_convention.py +273 -0
  69. accelforge/mapper/FFM/_pareto_df/pareto copy.py +836 -0
  70. accelforge/mapper/FFM/_pareto_df/pareto.py +508 -0
  71. accelforge/mapper/FFM/data.py +61 -0
  72. accelforge/mapper/FFM/main copy.py +236 -0
  73. accelforge/mapper/FFM/main.py +208 -0
  74. accelforge/mapper/FFM/mappings.py +510 -0
  75. accelforge/mapper/FFM/pmappings.py +310 -0
  76. accelforge/mapper/__init__.py +4 -0
  77. accelforge/mapper.py +0 -0
  78. accelforge/model/__init__.py +1 -0
  79. accelforge/model/_looptree/__init__.py +0 -0
  80. accelforge/model/_looptree/accesses.py +335 -0
  81. accelforge/model/_looptree/capacity/__init__.py +1 -0
  82. accelforge/model/_looptree/capacity/aggregators.py +36 -0
  83. accelforge/model/_looptree/capacity/capacity.py +47 -0
  84. accelforge/model/_looptree/energy.py +150 -0
  85. accelforge/model/_looptree/equivalent_ranks.py +29 -0
  86. accelforge/model/_looptree/latency/__init__.py +1 -0
  87. accelforge/model/_looptree/latency/latency.py +98 -0
  88. accelforge/model/_looptree/latency/memory.py +120 -0
  89. accelforge/model/_looptree/latency/processors.py +92 -0
  90. accelforge/model/_looptree/mapping_utilities.py +71 -0
  91. accelforge/model/_looptree/reuse/__init__.py +4 -0
  92. accelforge/model/_looptree/reuse/isl/__init__.py +1 -0
  93. accelforge/model/_looptree/reuse/isl/des.py +59 -0
  94. accelforge/model/_looptree/reuse/isl/isl_functions.py +374 -0
  95. accelforge/model/_looptree/reuse/isl/mapping_to_isl/__init__.py +4 -0
  96. accelforge/model/_looptree/reuse/isl/mapping_to_isl/analyze_mapping.py +297 -0
  97. accelforge/model/_looptree/reuse/isl/mapping_to_isl/skews_from_mapping.py +236 -0
  98. accelforge/model/_looptree/reuse/isl/mapping_to_isl/tiling.py +685 -0
  99. accelforge/model/_looptree/reuse/isl/mapping_to_isl/types.py +188 -0
  100. accelforge/model/_looptree/reuse/isl/spatial.py +260 -0
  101. accelforge/model/_looptree/reuse/isl/temporal.py +182 -0
  102. accelforge/model/_looptree/reuse/symbolic/__init__.py +1 -0
  103. accelforge/model/_looptree/reuse/symbolic/symbolic copy 2.py +1346 -0
  104. accelforge/model/_looptree/reuse/symbolic/symbolic copy.py +1408 -0
  105. accelforge/model/_looptree/reuse/symbolic/symbolic.py +1396 -0
  106. accelforge/model/_looptree/run.py +122 -0
  107. accelforge/model/_looptree/types.py +26 -0
  108. accelforge/model/_looptree/visualization/__init__.py +0 -0
  109. accelforge/model/_looptree/visualization/occupancy.py +11 -0
  110. accelforge/model/main.py +222 -0
  111. accelforge/plotting/__init__.py +2 -0
  112. accelforge/plotting/mappings.py +219 -0
  113. accelforge/plotting/specs.py +57 -0
  114. accelforge/util/__init__.py +4 -0
  115. accelforge/util/_base_analysis_types.py +24 -0
  116. accelforge/util/_basetypes.py +1089 -0
  117. accelforge/util/_frozenset.py +36 -0
  118. accelforge/util/_isl.py +29 -0
  119. accelforge/util/_itertools.py +14 -0
  120. accelforge/util/_mathfuncs.py +57 -0
  121. accelforge/util/_parse_expressions.py +339 -0
  122. accelforge/util/_picklecache.py +32 -0
  123. accelforge/util/_setexpressions.py +268 -0
  124. accelforge/util/_sympy/__init__.py +0 -0
  125. accelforge/util/_sympy/broadcast_max.py +18 -0
  126. accelforge/util/_visualization.py +112 -0
  127. accelforge/util/_yaml.py +579 -0
  128. accelforge/util/parallel.py +193 -0
  129. accelforge-0.0.1.dist-info/METADATA +64 -0
  130. accelforge-0.0.1.dist-info/RECORD +258 -0
  131. accelforge-0.0.1.dist-info/WHEEL +5 -0
  132. accelforge-0.0.1.dist-info/licenses/LICENSE +19 -0
  133. accelforge-0.0.1.dist-info/top_level.txt +5 -0
  134. docs/_build/html/_sources/fastfusion.frontend.mapper.rst.txt +37 -0
  135. docs/_build/html/_sources/fastfusion.frontend.rst.txt +70 -0
  136. docs/_build/html/_sources/fastfusion.frontend.workload.rst.txt +21 -0
  137. docs/_build/html/_sources/fastfusion.mapper.FFM.rst.txt +37 -0
  138. docs/_build/html/_sources/fastfusion.mapper.rst.txt +18 -0
  139. docs/_build/html/_sources/fastfusion.rst.txt +20 -0
  140. docs/_build/html/_sources/fastfusion.util.rst.txt +21 -0
  141. docs/_build/html/_sources/index.rst.txt +87 -0
  142. docs/_build/html/_sources/modules.rst.txt +7 -0
  143. docs/_build/html/_sources/notes/citation.rst.txt +45 -0
  144. docs/_build/html/_sources/notes/definitions.rst.txt +43 -0
  145. docs/_build/html/_sources/notes/faqs.rst.txt +39 -0
  146. docs/_build/html/_sources/notes/modeling/accelerator_energy_latency.rst.txt +72 -0
  147. docs/_build/html/_sources/notes/modeling/component_energy_area.rst.txt +96 -0
  148. docs/_build/html/_sources/notes/modeling/mapping.rst.txt +100 -0
  149. docs/_build/html/_sources/notes/modeling.rst.txt +33 -0
  150. docs/_build/html/_sources/notes/parsing/arithmetic_parsing.rst.txt +136 -0
  151. docs/_build/html/_sources/notes/parsing/setexpressions.rst.txt +63 -0
  152. docs/_build/html/_sources/notes/parsing/yaml_parsing.rst.txt +176 -0
  153. docs/_build/html/_sources/notes/quickstart_and_installation.rst.txt +9 -0
  154. docs/_build/html/_sources/notes/spec/architecture.rst.txt +133 -0
  155. docs/_build/html/_sources/notes/spec/mapping.rst.txt +12 -0
  156. docs/_build/html/_sources/notes/spec/workload.rst.txt +83 -0
  157. docs/_build/html/_sources/notes/spec.rst.txt +36 -0
  158. docs/source/_ext/include_attrs.py +213 -0
  159. docs/source/_ext/include_docstring.py +364 -0
  160. docs/source/_ext/include_functions.py +154 -0
  161. docs/source/_ext/include_notebook.py +131 -0
  162. docs/source/_ext/include_yaml.py +119 -0
  163. docs/source/_ext/inherited_attributes.py +222 -0
  164. docs/source/_ext/paths.py +4 -0
  165. docs/source/conf.py +79 -0
  166. examples/arches/compute_in_memory/_include.yaml +74 -0
  167. examples/arches/compute_in_memory/_include_functions.py +229 -0
  168. examples/arches/compute_in_memory/_load_spec.py +57 -0
  169. examples/arches/compute_in_memory/components/c2c_multiplier.py +181 -0
  170. examples/arches/compute_in_memory/components/dac_c2c_r2r.py +605 -0
  171. examples/arches/compute_in_memory/components/misc.py +195 -0
  172. examples/arches/compute_in_memory/components/util/bit_functions.py +51 -0
  173. examples/arches/compute_in_memory/components/zero_comparator.py +92 -0
  174. examples/arches/compute_in_memory/isaac.yaml +233 -0
  175. examples/arches/compute_in_memory/memory_cells/ecram_demo.yaml +63 -0
  176. examples/arches/compute_in_memory/memory_cells/rram_example.yaml +63 -0
  177. examples/arches/compute_in_memory/memory_cells/rram_isaac_isca_2016.yaml +64 -0
  178. examples/arches/compute_in_memory/memory_cells/rram_neurosim_default.yaml +63 -0
  179. examples/arches/compute_in_memory/memory_cells/rram_raella_isca_2023.yaml +70 -0
  180. examples/arches/compute_in_memory/memory_cells/rram_wan_nature_2022.yaml +63 -0
  181. examples/arches/compute_in_memory/memory_cells/sram_colonnade_jssc_2021.yaml +63 -0
  182. examples/arches/compute_in_memory/memory_cells/sram_example.yaml +63 -0
  183. examples/arches/compute_in_memory/memory_cells/sram_jia_jssc_2020.yaml +63 -0
  184. examples/arches/compute_in_memory/memory_cells/sram_sinangil_jssc_2021.yaml +63 -0
  185. examples/arches/compute_in_memory/memory_cells/sram_wang_vlsi_2022.yaml +63 -0
  186. examples/arches/compute_in_memory/wang_vlsi_2022.yaml +289 -0
  187. examples/arches/eyeriss.yaml +68 -0
  188. examples/arches/fanout_variations/at_glb.yaml +31 -0
  189. examples/arches/fanout_variations/at_glb_with_fanout_node.yaml +34 -0
  190. examples/arches/fanout_variations/at_mac.yaml +31 -0
  191. examples/arches/fanout_variations/at_mac_with_constraints.yaml +38 -0
  192. examples/arches/fanout_variations/at_mac_with_fanout_node.yaml +34 -0
  193. examples/arches/nvdla.yaml +47 -0
  194. examples/arches/simple.yaml +28 -0
  195. examples/arches/tpu_v4i.yaml +67 -0
  196. examples/mappings/unfused_matmuls_to_simple.yaml +33 -0
  197. examples/misc/component_annotated.yaml +33 -0
  198. examples/workloads/gpt3_6.7B.yaml +124 -0
  199. examples/workloads/matmuls.yaml +20 -0
  200. examples/workloads/mobilenet_28.yaml +81 -0
  201. examples/workloads/mobilenet_various_separate.yaml +106 -0
  202. examples/workloads/three_matmuls_annotated.yaml +59 -0
  203. notebooks/.ipynb_checkpoints/fastfusion_arch_study_michael-checkpoint.ipynb +359 -0
  204. notebooks/compute_in_memory/_scripts.py +339 -0
  205. notebooks/compute_in_memory/isaac.guide.ipynb +270 -0
  206. notebooks/compute_in_memory/wang_vlsi_2022.ipynb +602 -0
  207. notebooks/paths.py +4 -0
  208. notebooks/tutorials/.ipynb_checkpoints/1_FFM-checkpoint.ipynb +3110 -0
  209. notebooks/tutorials/FFM.ipynb +3498 -0
  210. notebooks/tutorials/_include.py +48 -0
  211. notebooks/tutorials/component_energy_area.ipynb +363 -0
  212. tests/Q_mapping.yaml +38 -0
  213. tests/__init__.py +0 -0
  214. tests/conv.mapping.yaml +27 -0
  215. tests/conv.workload.yaml +13 -0
  216. tests/conv_sym.mapping.yaml +43 -0
  217. tests/copy.mapping.yaml +35 -0
  218. tests/copy.workload.yaml +15 -0
  219. tests/distribuffers/__init__.py +0 -0
  220. tests/distribuffers/multicast/test_cases.yaml +482 -0
  221. tests/distribuffers/spec/binding/valid_bindings.yaml +97 -0
  222. tests/distribuffers/spec/distributed.yaml +100 -0
  223. tests/distribuffers/spec/logical_arch.yaml +32 -0
  224. tests/distribuffers/spec/physical_arch.yaml +69 -0
  225. tests/distribuffers/test_binding.py +48 -0
  226. tests/frontend/__init__.py +0 -0
  227. tests/frontend/test_mapping_viz.py +52 -0
  228. tests/mapper/__init__.py +0 -0
  229. tests/mapper/configs/conv1d/conv1d.mapping.yaml +31 -0
  230. tests/mapper/configs/conv1d/conv1d.workload.yaml +11 -0
  231. tests/mapper/configs/two_conv1d/two_conv1d.expected.yaml +38 -0
  232. tests/mapper/configs/two_conv1d/two_conv1d.mapping.yaml +54 -0
  233. tests/mapper/configs/two_conv1d/two_conv1d.workload.yaml +19 -0
  234. tests/mapper/test_mapping_to_isl.py +90 -0
  235. tests/mapper/test_spatial_reuse_analysis.py +67 -0
  236. tests/mapper/test_temporal_reuse_analysis.py +56 -0
  237. tests/mapper/util.py +58 -0
  238. tests/matmul.mapping.yaml +29 -0
  239. tests/matmul.workload.yaml +12 -0
  240. tests/matmul_spatial.mapping.yaml +44 -0
  241. tests/mha.renames.yaml +65 -0
  242. tests/mha.workload.yaml +67 -0
  243. tests/mha.yaml +59 -0
  244. tests/mha_full.workload.yaml +67 -0
  245. tests/mobilenet.workload.yaml +35 -0
  246. tests/mobilenet_long.workload.yaml +64 -0
  247. tests/pmappingcache.py +24 -0
  248. tests/processing_stage.arch.yaml +40 -0
  249. tests/snowcat.arch.yaml +36 -0
  250. tests/test_ffm_join_pmappings.py +106 -0
  251. tests/test_ffm_make_pmappings.py +82 -0
  252. tests/test_ffm_make_tile_shapes.py +49 -0
  253. tests/test_mapper.py +100 -0
  254. tests/test_model.py +37 -0
  255. tests/test_plotting.py +72 -0
  256. tests/test_processing_stage.py +46 -0
  257. tests/test_symbolic_model.py +248 -0
  258. tests/test_workload.py +141 -0
@@ -0,0 +1,411 @@
1
+ import logging
2
+ from math import prod
3
+
4
+ from typing import Callable, Optional
5
+ import uuid
6
+ import copy
7
+
8
+ from joblib import delayed
9
+ from tqdm import tqdm
10
+
11
+
12
+ from accelforge.frontend import arch
13
+ from accelforge.frontend.spec import Spec
14
+ from accelforge.frontend.mapping import Loop, Mapping, TensorHolder
15
+ from accelforge.frontend._workload_isl._isl import (
16
+ get_rank_variable_bounds,
17
+ get_tensor_size,
18
+ get_operation_space_size,
19
+ )
20
+ from accelforge.frontend.workload import EinsumName, SymbolTable, TensorName
21
+
22
+ from accelforge.mapper.FFM._make_pmappings.make_pmapping_templates import (
23
+ make_pmapping_templates,
24
+ )
25
+ from accelforge.frontend.mapper.metrics import Metrics
26
+ from accelforge.mapper.FFM._make_pmappings.make_pmappings_from_templates import (
27
+ make_pmappings_from_templates,
28
+ )
29
+ from accelforge.mapper.FFM._join_pmappings.compatibility import Compatibility
30
+ from accelforge.mapper.FFM._join_pmappings.pmapping_group import PmappingGroup
31
+ from accelforge.util.parallel import (
32
+ parallel,
33
+ _memmap_read,
34
+ get_n_parallel_jobs,
35
+ is_using_parallel_processing,
36
+ )
37
+ from accelforge.mapper.FFM._make_pmappings.pmapper_job import (
38
+ Job,
39
+ SameCompatibilityJobs,
40
+ )
41
+
42
+
43
+ def get_rank_variable_bounds_for_all_einsums(spec: Spec):
44
+ rank_variable_bounds = {
45
+ einsum_name: get_rank_variable_bounds(spec.workload, einsum_name)
46
+ for einsum_name in spec.workload.einsum_names
47
+ }
48
+ result = {}
49
+ for e1, rv1 in rank_variable_bounds.items():
50
+ result.update(rv1)
51
+ for e2, rv2 in rank_variable_bounds.items():
52
+ for r in set(rv1.keys()) & set(rv2.keys()):
53
+ if rv1[r] != rv2[r]:
54
+ raise ValueError(
55
+ f"Rank variable {r} has different bounds for "
56
+ f"einsum {e1} and {e2}: {rv1[r]} and {rv2[r]}"
57
+ )
58
+ return result
59
+
60
+
61
+ def get_num_computes(spec: Spec, einsum_name: EinsumName | None = None) -> int:
62
+ einsums = spec.workload.einsums
63
+ einsums = [einsum_name] if einsum_name is not None else spec.workload.einsum_names
64
+ return sum(get_operation_space_size(spec.workload, e) for e in einsums)
65
+
66
+
67
+ def get_per_tensor_size(spec: Spec) -> dict[TensorName, int]:
68
+ return {
69
+ tensor: get_tensor_size(spec.workload, tensor)
70
+ for tensor in spec.workload.tensor_names
71
+ }
72
+
73
+
74
+ def get_jobs(
75
+ spec: Spec,
76
+ metrics: Metrics,
77
+ einsum_names: list[EinsumName],
78
+ fail_if_no_pmappings_for_einsum: bool,
79
+ ) -> dict[EinsumName, dict[Compatibility, SameCompatibilityJobs]]:
80
+
81
+ spec = spec
82
+
83
+ einsum2jobs = {}
84
+ fusable_tensors = spec.workload.tensor_names_used_in_multiple_einsums
85
+ rank_variable_bounds = get_rank_variable_bounds_for_all_einsums(spec)
86
+
87
+ einsum2spec: dict[EinsumName, Spec] = {}
88
+ s = f"Getting energy, latency, and leak power for components running "
89
+ pbar = tqdm(einsum_names, desc=s)
90
+ for einsum_name in pbar:
91
+ pbar.set_description(s + einsum_name)
92
+ einsum2spec[einsum_name] = spec._spec_parse_expressions(
93
+ einsum_name=einsum_name,
94
+ _parse_arch=True,
95
+ _parse_non_arch=False,
96
+ ).calculate_component_area_energy_latency_leak(
97
+ einsum_name=einsum_name,
98
+ area=False,
99
+ )
100
+ einsum2spec[einsum_name] = _memmap_read(einsum2spec[einsum_name])
101
+
102
+ def make_jobs_for_einsum(einsum_name: EinsumName, spec: Spec):
103
+ jobs = {}
104
+ workload_einsum = spec.workload.einsums[einsum_name]
105
+ for flattened_arch in spec._get_flattened_architecture():
106
+ # Create jobs for each Einsum
107
+ job = Job(
108
+ spec=spec,
109
+ einsum_name=einsum_name,
110
+ metrics=metrics,
111
+ rank_variable_bounds=rank_variable_bounds,
112
+ flattened_arch=_memmap_read(flattened_arch),
113
+ job_id=uuid.uuid4(),
114
+ fusable_tensors=fusable_tensors & workload_einsum.tensor_names,
115
+ )
116
+ for j in make_pmapping_templates(job):
117
+ jobs.setdefault(j.compatibility, SameCompatibilityJobs()).append(j)
118
+
119
+ return einsum_name, jobs
120
+
121
+ for einsum_name, jobs in parallel(
122
+ [
123
+ delayed(make_jobs_for_einsum)(einsum_name, spec)
124
+ for einsum_name, spec in einsum2spec.items()
125
+ ],
126
+ pbar="Generating jobs",
127
+ return_as="generator",
128
+ ):
129
+ einsum2jobs.setdefault(einsum_name, {})
130
+ for compatibility, job_list in jobs.items():
131
+ einsum2jobs[einsum_name].setdefault(
132
+ compatibility, SameCompatibilityJobs()
133
+ ).extend(job_list)
134
+
135
+ if fail_if_no_pmappings_for_einsum:
136
+ for einsum_name, jobs in einsum2jobs.items():
137
+ if len(jobs) == 0:
138
+ raise ValueError(
139
+ f"No pmappings for {einsum_name}. Was the mapspace overconstrained?"
140
+ )
141
+
142
+ total_jobs = sum(len(jobs) for jobs in einsum2jobs.values())
143
+ n_procs = get_n_parallel_jobs()
144
+ memory_limit = min(
145
+ spec.mapper.ffm.memory_limit, spec.mapper.ffm.memory_limit_per_process / n_procs
146
+ )
147
+ time_limit = min(
148
+ spec.mapper.ffm.time_limit * n_procs / max(total_jobs, 1),
149
+ spec.mapper.ffm.time_limit_per_pmapping_template,
150
+ )
151
+ for einsum_name, compatibility_jobs in einsum2jobs.items():
152
+ total_jobs = sum(len(j) for j in compatibility_jobs.values())
153
+ logging.warning(f"Einsum {einsum_name} has {total_jobs} pmapping templates:")
154
+ for job_list in compatibility_jobs.values():
155
+ for job in job_list:
156
+ logging.warning(f"\t{job.mapping.compact_str()}")
157
+ job.memory_limit = memory_limit
158
+ job.time_limit = time_limit
159
+
160
+ return einsum2jobs
161
+
162
+
163
+ def get_memories_to_track(
164
+ spec: Spec,
165
+ einsum2jobs: dict[EinsumName, list[Job]],
166
+ metrics: Metrics,
167
+ can_combine_multiple_runs: bool,
168
+ ) -> tuple[list[str], list[str]]:
169
+
170
+ memories_track_all = set()
171
+ for einsum, jobs in einsum2jobs.items():
172
+ for job in jobs:
173
+ memories_track_all.update(
174
+ m.name for m in job.flattened_arch if isinstance(m, arch.Memory)
175
+ )
176
+
177
+ memories_track_pmappings_only = []
178
+ ignored_resources = set()
179
+
180
+ # If we're combining the pmappings from multiple runs, we can't conclude anything
181
+ # about the metrics to track
182
+ if can_combine_multiple_runs:
183
+ ignored_resources = memories_track_all
184
+ return (
185
+ memories_track_all,
186
+ memories_track_pmappings_only,
187
+ ignored_resources,
188
+ )
189
+
190
+ if metrics.RESOURCE_USAGE in metrics:
191
+ ignored_resources = memories_track_all
192
+ return (
193
+ memories_track_all,
194
+ memories_track_pmappings_only,
195
+ ignored_resources,
196
+ )
197
+
198
+ tensor_sizes = {}
199
+ for tensor, size in get_per_tensor_size(spec).items():
200
+ scale = 1
201
+ for einsum in spec.workload.einsums_with_tensor(tensor):
202
+ if einsum.tensor_accesses[tensor].persistent:
203
+ scale = max(scale, spec.workload.n_instances * einsum.n_instances)
204
+ tensor_sizes[tensor] = size * scale
205
+
206
+ # If the memory is big enough to hold all the tensors then we don't need to consider
207
+ # it
208
+ for memory in list(memories_track_all):
209
+ usage = 0
210
+ for einsum in einsum2jobs.keys():
211
+ job = einsum2jobs[einsum][0]
212
+ try:
213
+ mem: arch.Memory = job.spec.arch.find(memory)
214
+ except ValueError:
215
+ continue
216
+ for tensor in spec.workload.einsums[einsum].tensor_names:
217
+ if mem.size == 0:
218
+ usage = 2 # FAIL
219
+ else:
220
+ scale = mem.bits_per_value_scale[tensor] / mem.size
221
+ usage += tensor_sizes[tensor] * scale
222
+
223
+ if usage <= 1:
224
+ ignored_resources.add(memory)
225
+ print(
226
+ f"Not tracking memory {memory}. It is big enough to hold "
227
+ f"every workload tensor that may be stored in it. Max possible "
228
+ f"usage: {usage * 100:.2f}%"
229
+ )
230
+ memories_track_all.remove(memory)
231
+
232
+ # If the memory is below every backing tensor holder node, then we need it for the
233
+ # pmapping exploration but can drop it immediately
234
+ for m in list(memories_track_all):
235
+ must_track = False
236
+ for job in jobs:
237
+ seen = False
238
+ for node in job.mapping.nodes:
239
+ if isinstance(node, TensorHolder) and node.component == m:
240
+ seen = True
241
+ if node.persistent:
242
+ ignored_resources.add(m)
243
+ if node._backing:
244
+ must_track = True
245
+ if isinstance(node, Loop) and node._fused and seen:
246
+ must_track = True
247
+
248
+ if not must_track:
249
+ memories_track_all.remove(m)
250
+ memories_track_pmappings_only.append(m)
251
+ print(
252
+ f"Not tracking memory {m} across joining stages. It is never "
253
+ f"reserved across fused loop iterations."
254
+ )
255
+
256
+ return memories_track_all, memories_track_pmappings_only, ignored_resources
257
+
258
+
259
+ def make_pmappings(
260
+ spec: Spec,
261
+ can_combine_multiple_runs: bool,
262
+ metrics: Metrics = Metrics.ENERGY | Metrics.LATENCY,
263
+ einsum_names: Optional[list[EinsumName]] = None,
264
+ fail_if_no_pmappings_for_einsum: bool | None = None,
265
+ ) -> tuple[
266
+ dict[EinsumName, list[PmappingGroup]],
267
+ dict[EinsumName, dict[uuid.UUID, Mapping]],
268
+ dict[EinsumName, list[Job]],
269
+ ]:
270
+ """
271
+ Explores pmapspace of `einsum_names` (default: all Einsums in workload).
272
+ """
273
+ spec = copy.deepcopy(spec)
274
+
275
+ if einsum_names is None:
276
+ einsum_names = spec.workload.einsum_names
277
+
278
+ if fail_if_no_pmappings_for_einsum is None:
279
+ fail_if_no_pmappings_for_einsum = not can_combine_multiple_runs
280
+
281
+ spec = spec._spec_parse_expressions(
282
+ _parse_arch=False,
283
+ _parse_non_arch=True,
284
+ )
285
+
286
+ einsum2jobs = {}
287
+ new_einsum2jobs = get_jobs(
288
+ spec,
289
+ metrics,
290
+ einsum_names,
291
+ fail_if_no_pmappings_for_einsum,
292
+ )
293
+ _fill_jobs_with_memories_to_track(
294
+ new_einsum2jobs, spec, metrics, can_combine_multiple_runs
295
+ )
296
+ for einsum_name, jobs in new_einsum2jobs.items():
297
+ einsum2jobs.setdefault(einsum_name, {})
298
+ for compatibility, job_list in jobs.items():
299
+ einsum2jobs[einsum_name].setdefault(
300
+ compatibility, SameCompatibilityJobs()
301
+ ).extend(job_list)
302
+
303
+ calls = _allocate_jobs(einsum2jobs)
304
+
305
+ # Sort the calls by the length of the longest mapping in each job. We get long
306
+ # poles with the long mappings, so we want to get them done early so we don't
307
+ # have one or two procs slowing us down at the end.
308
+ def get_longest_mapping_length(call):
309
+ j: SameCompatibilityJobs = call[2]["jobs_with_similar_compatibilities"]
310
+ return max([len(j2.mapping.nodes) for j2 in j])
311
+
312
+ calls = sorted(calls, key=get_longest_mapping_length, reverse=True)
313
+ # # Randomly permute the calls
314
+ # import random
315
+ # random.shuffle(calls)
316
+
317
+ pmapping_objects = {}
318
+ pmapping_groups = {einsum_name: [] for einsum_name in spec.workload.einsum_names}
319
+ return_jobs = {}
320
+ for (
321
+ einsum_name,
322
+ new_pmapping_groups,
323
+ pmappings,
324
+ jobs_with_similar_compatibilities,
325
+ ) in parallel(
326
+ calls,
327
+ pbar=f"Generating pmappings",
328
+ return_as="generator_unordered",
329
+ ):
330
+ pmapping_groups[einsum_name].extend(new_pmapping_groups)
331
+ pmapping_objects.setdefault(einsum_name, {}).update(pmappings)
332
+ return_jobs.setdefault(einsum_name, []).extend(
333
+ jobs_with_similar_compatibilities
334
+ )
335
+
336
+ for einsum_name in list(pmapping_groups.keys()):
337
+ pmapping_groups[einsum_name] = PmappingGroup.combine_combineable(
338
+ pmapping_groups[einsum_name],
339
+ "All",
340
+ pbar_postfix=f" for {einsum_name}",
341
+ )
342
+
343
+ return pmapping_groups, pmapping_objects, return_jobs
344
+
345
+
346
+ def _raise_error_if_no_pmappings(einsum2jobs):
347
+ for einsum_name, jobs in einsum2jobs.items():
348
+ if len(jobs) == 0:
349
+ raise ValueError(
350
+ f"No pmappings for {einsum_name}. " f"Was the mapspace overconstrained?"
351
+ )
352
+
353
+
354
+ def _allocate_jobs(einsum2jobs):
355
+ calls = []
356
+ for einsum_name, jobs in einsum2jobs.items():
357
+ calls.extend(
358
+ delayed(make_pmappings_from_templates)(
359
+ jobs_with_similar_compatibilities=job_list,
360
+ )
361
+ for job_list in jobs.values()
362
+ )
363
+
364
+ split = False
365
+ if (
366
+ not split
367
+ and is_using_parallel_processing()
368
+ and len(calls) < get_n_parallel_jobs() * 4
369
+ ):
370
+ logging.warning(
371
+ f"Insufficient jobs available to utilize available threads. "
372
+ f"Splitting jobs into smaller chunks."
373
+ )
374
+ split = True
375
+
376
+ if split:
377
+ calls = []
378
+ for einsum_name, jobs in einsum2jobs.items():
379
+ for job_list in jobs.values():
380
+ calls.extend(
381
+ delayed(make_pmappings_from_templates)(
382
+ jobs_with_similar_compatibilities=job,
383
+ )
384
+ for job in job_list.split()
385
+ )
386
+ return calls
387
+
388
+
389
+ def _fill_jobs_with_memories_to_track(
390
+ einsum2jobs: dict[EinsumName, dict[Compatibility, SameCompatibilityJobs]],
391
+ spec,
392
+ metrics,
393
+ can_combine_multiple_runs,
394
+ ):
395
+ einsum2jobs_flattened = {
396
+ e: [j for jobs in v.values() for j in jobs] for e, v in einsum2jobs.items()
397
+ }
398
+
399
+ memories_track_all, memories_track_pmappings_only, ignored_resources = (
400
+ get_memories_to_track(
401
+ spec,
402
+ einsum2jobs_flattened,
403
+ metrics,
404
+ can_combine_multiple_runs,
405
+ )
406
+ )
407
+ for jobs in einsum2jobs_flattened.values():
408
+ for j in jobs:
409
+ j.memories_track_all = memories_track_all
410
+ j.memories_track_pmappings_only = memories_track_pmappings_only
411
+ j.ignored_resources = ignored_resources
@@ -0,0 +1 @@
1
+ from .make_pmappings_from_templates import make_pmappings_from_templates