accelforge 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (258) hide show
  1. accelforge/__init__.py +21 -0
  2. accelforge/_accelerated_imports.py +16 -0
  3. accelforge/_deprecate/_simanneal/evalmapping.py +271 -0
  4. accelforge/_deprecate/_simanneal/mapspaceglobals.py +298 -0
  5. accelforge/_deprecate/_simanneal/simanneal.py +666 -0
  6. accelforge/_deprecate/_simanneal/tracking.py +105 -0
  7. accelforge/_deprecate/_simanneal/wrappers.py +218 -0
  8. accelforge/_deprecate/_simanneal2/__init__.py +7 -0
  9. accelforge/_deprecate/_simanneal2/simanneal.py +493 -0
  10. accelforge/_deprecate/_simanneal2/tracking.py +116 -0
  11. accelforge/_deprecate/compatibility_util.py +181 -0
  12. accelforge/_deprecate/layerdeduplication/__init__.py +2 -0
  13. accelforge/_deprecate/layerdeduplication/group_similar_einsums.py +160 -0
  14. accelforge/_deprecate/layerdeduplication/grouped_einsums.py +84 -0
  15. accelforge/_deprecate/mapping_filter_tags/__init__.py +2 -0
  16. accelforge/_deprecate/mapping_filter_tags/ffmt.py +212 -0
  17. accelforge/_deprecate/mapping_filter_tags/onesplit.py +24 -0
  18. accelforge/_deprecate/mapping_filter_tags/util.py +24 -0
  19. accelforge/_deprecate/tags.py +69 -0
  20. accelforge/_deprecate/viz/__init__.py +0 -0
  21. accelforge/_deprecate/viz/interactive.py +159 -0
  22. accelforge/_deprecate/viz/reservationtree.py +307 -0
  23. accelforge/_deprecate/viz/ski_slope.py +88 -0
  24. accelforge/_version.py +15 -0
  25. accelforge/examples.py +39 -0
  26. accelforge/frontend/__init__.py +10 -0
  27. accelforge/frontend/_binding.py +129 -0
  28. accelforge/frontend/_workload_isl/__init__.py +2 -0
  29. accelforge/frontend/_workload_isl/_isl.py +149 -0
  30. accelforge/frontend/_workload_isl/_symbolic.py +141 -0
  31. accelforge/frontend/arch copy.py +1544 -0
  32. accelforge/frontend/arch.py +1642 -0
  33. accelforge/frontend/config.py +63 -0
  34. accelforge/frontend/mapper/__init__.py +5 -0
  35. accelforge/frontend/mapper/ffm.py +126 -0
  36. accelforge/frontend/mapper/mapper.py +7 -0
  37. accelforge/frontend/mapper/metrics.py +30 -0
  38. accelforge/frontend/mapping/__init__.py +1 -0
  39. accelforge/frontend/mapping/mapping.py +1736 -0
  40. accelforge/frontend/model.py +14 -0
  41. accelforge/frontend/renames.py +150 -0
  42. accelforge/frontend/spec copy.py +230 -0
  43. accelforge/frontend/spec.py +301 -0
  44. accelforge/frontend/variables.py +12 -0
  45. accelforge/frontend/workload.py +952 -0
  46. accelforge/mapper/FFM/__init__.py +9 -0
  47. accelforge/mapper/FFM/_join_pmappings/__init__.py +0 -0
  48. accelforge/mapper/FFM/_join_pmappings/compatibility.py +653 -0
  49. accelforge/mapper/FFM/_join_pmappings/compress_pmappings.py +140 -0
  50. accelforge/mapper/FFM/_join_pmappings/join_pmappings.py +703 -0
  51. accelforge/mapper/FFM/_join_pmappings/pmapping_dataframe.py +901 -0
  52. accelforge/mapper/FFM/_join_pmappings/pmapping_group.py +337 -0
  53. accelforge/mapper/FFM/_make_pmappings/contraints/__init__.py +0 -0
  54. accelforge/mapper/FFM/_make_pmappings/contraints/constraints.py +360 -0
  55. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/__init__.py +1 -0
  56. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_loops.py +373 -0
  57. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_pmapping_templates.py +463 -0
  58. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_reservations.py +95 -0
  59. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storage_order.py +382 -0
  60. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storages.py +155 -0
  61. accelforge/mapper/FFM/_make_pmappings/make_pmappings.py +411 -0
  62. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/__init__.py +1 -0
  63. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_pmappings_from_templates.py +407 -0
  64. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_tile_shapes.py +1681 -0
  65. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/run_model.py +170 -0
  66. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/symbol_relations.py +174 -0
  67. accelforge/mapper/FFM/_make_pmappings/pmapper_job.py +282 -0
  68. accelforge/mapper/FFM/_pareto_df/df_convention.py +273 -0
  69. accelforge/mapper/FFM/_pareto_df/pareto copy.py +836 -0
  70. accelforge/mapper/FFM/_pareto_df/pareto.py +508 -0
  71. accelforge/mapper/FFM/data.py +61 -0
  72. accelforge/mapper/FFM/main copy.py +236 -0
  73. accelforge/mapper/FFM/main.py +208 -0
  74. accelforge/mapper/FFM/mappings.py +510 -0
  75. accelforge/mapper/FFM/pmappings.py +310 -0
  76. accelforge/mapper/__init__.py +4 -0
  77. accelforge/mapper.py +0 -0
  78. accelforge/model/__init__.py +1 -0
  79. accelforge/model/_looptree/__init__.py +0 -0
  80. accelforge/model/_looptree/accesses.py +335 -0
  81. accelforge/model/_looptree/capacity/__init__.py +1 -0
  82. accelforge/model/_looptree/capacity/aggregators.py +36 -0
  83. accelforge/model/_looptree/capacity/capacity.py +47 -0
  84. accelforge/model/_looptree/energy.py +150 -0
  85. accelforge/model/_looptree/equivalent_ranks.py +29 -0
  86. accelforge/model/_looptree/latency/__init__.py +1 -0
  87. accelforge/model/_looptree/latency/latency.py +98 -0
  88. accelforge/model/_looptree/latency/memory.py +120 -0
  89. accelforge/model/_looptree/latency/processors.py +92 -0
  90. accelforge/model/_looptree/mapping_utilities.py +71 -0
  91. accelforge/model/_looptree/reuse/__init__.py +4 -0
  92. accelforge/model/_looptree/reuse/isl/__init__.py +1 -0
  93. accelforge/model/_looptree/reuse/isl/des.py +59 -0
  94. accelforge/model/_looptree/reuse/isl/isl_functions.py +374 -0
  95. accelforge/model/_looptree/reuse/isl/mapping_to_isl/__init__.py +4 -0
  96. accelforge/model/_looptree/reuse/isl/mapping_to_isl/analyze_mapping.py +297 -0
  97. accelforge/model/_looptree/reuse/isl/mapping_to_isl/skews_from_mapping.py +236 -0
  98. accelforge/model/_looptree/reuse/isl/mapping_to_isl/tiling.py +685 -0
  99. accelforge/model/_looptree/reuse/isl/mapping_to_isl/types.py +188 -0
  100. accelforge/model/_looptree/reuse/isl/spatial.py +260 -0
  101. accelforge/model/_looptree/reuse/isl/temporal.py +182 -0
  102. accelforge/model/_looptree/reuse/symbolic/__init__.py +1 -0
  103. accelforge/model/_looptree/reuse/symbolic/symbolic copy 2.py +1346 -0
  104. accelforge/model/_looptree/reuse/symbolic/symbolic copy.py +1408 -0
  105. accelforge/model/_looptree/reuse/symbolic/symbolic.py +1396 -0
  106. accelforge/model/_looptree/run.py +122 -0
  107. accelforge/model/_looptree/types.py +26 -0
  108. accelforge/model/_looptree/visualization/__init__.py +0 -0
  109. accelforge/model/_looptree/visualization/occupancy.py +11 -0
  110. accelforge/model/main.py +222 -0
  111. accelforge/plotting/__init__.py +2 -0
  112. accelforge/plotting/mappings.py +219 -0
  113. accelforge/plotting/specs.py +57 -0
  114. accelforge/util/__init__.py +4 -0
  115. accelforge/util/_base_analysis_types.py +24 -0
  116. accelforge/util/_basetypes.py +1089 -0
  117. accelforge/util/_frozenset.py +36 -0
  118. accelforge/util/_isl.py +29 -0
  119. accelforge/util/_itertools.py +14 -0
  120. accelforge/util/_mathfuncs.py +57 -0
  121. accelforge/util/_parse_expressions.py +339 -0
  122. accelforge/util/_picklecache.py +32 -0
  123. accelforge/util/_setexpressions.py +268 -0
  124. accelforge/util/_sympy/__init__.py +0 -0
  125. accelforge/util/_sympy/broadcast_max.py +18 -0
  126. accelforge/util/_visualization.py +112 -0
  127. accelforge/util/_yaml.py +579 -0
  128. accelforge/util/parallel.py +193 -0
  129. accelforge-0.0.1.dist-info/METADATA +64 -0
  130. accelforge-0.0.1.dist-info/RECORD +258 -0
  131. accelforge-0.0.1.dist-info/WHEEL +5 -0
  132. accelforge-0.0.1.dist-info/licenses/LICENSE +19 -0
  133. accelforge-0.0.1.dist-info/top_level.txt +5 -0
  134. docs/_build/html/_sources/fastfusion.frontend.mapper.rst.txt +37 -0
  135. docs/_build/html/_sources/fastfusion.frontend.rst.txt +70 -0
  136. docs/_build/html/_sources/fastfusion.frontend.workload.rst.txt +21 -0
  137. docs/_build/html/_sources/fastfusion.mapper.FFM.rst.txt +37 -0
  138. docs/_build/html/_sources/fastfusion.mapper.rst.txt +18 -0
  139. docs/_build/html/_sources/fastfusion.rst.txt +20 -0
  140. docs/_build/html/_sources/fastfusion.util.rst.txt +21 -0
  141. docs/_build/html/_sources/index.rst.txt +87 -0
  142. docs/_build/html/_sources/modules.rst.txt +7 -0
  143. docs/_build/html/_sources/notes/citation.rst.txt +45 -0
  144. docs/_build/html/_sources/notes/definitions.rst.txt +43 -0
  145. docs/_build/html/_sources/notes/faqs.rst.txt +39 -0
  146. docs/_build/html/_sources/notes/modeling/accelerator_energy_latency.rst.txt +72 -0
  147. docs/_build/html/_sources/notes/modeling/component_energy_area.rst.txt +96 -0
  148. docs/_build/html/_sources/notes/modeling/mapping.rst.txt +100 -0
  149. docs/_build/html/_sources/notes/modeling.rst.txt +33 -0
  150. docs/_build/html/_sources/notes/parsing/arithmetic_parsing.rst.txt +136 -0
  151. docs/_build/html/_sources/notes/parsing/setexpressions.rst.txt +63 -0
  152. docs/_build/html/_sources/notes/parsing/yaml_parsing.rst.txt +176 -0
  153. docs/_build/html/_sources/notes/quickstart_and_installation.rst.txt +9 -0
  154. docs/_build/html/_sources/notes/spec/architecture.rst.txt +133 -0
  155. docs/_build/html/_sources/notes/spec/mapping.rst.txt +12 -0
  156. docs/_build/html/_sources/notes/spec/workload.rst.txt +83 -0
  157. docs/_build/html/_sources/notes/spec.rst.txt +36 -0
  158. docs/source/_ext/include_attrs.py +213 -0
  159. docs/source/_ext/include_docstring.py +364 -0
  160. docs/source/_ext/include_functions.py +154 -0
  161. docs/source/_ext/include_notebook.py +131 -0
  162. docs/source/_ext/include_yaml.py +119 -0
  163. docs/source/_ext/inherited_attributes.py +222 -0
  164. docs/source/_ext/paths.py +4 -0
  165. docs/source/conf.py +79 -0
  166. examples/arches/compute_in_memory/_include.yaml +74 -0
  167. examples/arches/compute_in_memory/_include_functions.py +229 -0
  168. examples/arches/compute_in_memory/_load_spec.py +57 -0
  169. examples/arches/compute_in_memory/components/c2c_multiplier.py +181 -0
  170. examples/arches/compute_in_memory/components/dac_c2c_r2r.py +605 -0
  171. examples/arches/compute_in_memory/components/misc.py +195 -0
  172. examples/arches/compute_in_memory/components/util/bit_functions.py +51 -0
  173. examples/arches/compute_in_memory/components/zero_comparator.py +92 -0
  174. examples/arches/compute_in_memory/isaac.yaml +233 -0
  175. examples/arches/compute_in_memory/memory_cells/ecram_demo.yaml +63 -0
  176. examples/arches/compute_in_memory/memory_cells/rram_example.yaml +63 -0
  177. examples/arches/compute_in_memory/memory_cells/rram_isaac_isca_2016.yaml +64 -0
  178. examples/arches/compute_in_memory/memory_cells/rram_neurosim_default.yaml +63 -0
  179. examples/arches/compute_in_memory/memory_cells/rram_raella_isca_2023.yaml +70 -0
  180. examples/arches/compute_in_memory/memory_cells/rram_wan_nature_2022.yaml +63 -0
  181. examples/arches/compute_in_memory/memory_cells/sram_colonnade_jssc_2021.yaml +63 -0
  182. examples/arches/compute_in_memory/memory_cells/sram_example.yaml +63 -0
  183. examples/arches/compute_in_memory/memory_cells/sram_jia_jssc_2020.yaml +63 -0
  184. examples/arches/compute_in_memory/memory_cells/sram_sinangil_jssc_2021.yaml +63 -0
  185. examples/arches/compute_in_memory/memory_cells/sram_wang_vlsi_2022.yaml +63 -0
  186. examples/arches/compute_in_memory/wang_vlsi_2022.yaml +289 -0
  187. examples/arches/eyeriss.yaml +68 -0
  188. examples/arches/fanout_variations/at_glb.yaml +31 -0
  189. examples/arches/fanout_variations/at_glb_with_fanout_node.yaml +34 -0
  190. examples/arches/fanout_variations/at_mac.yaml +31 -0
  191. examples/arches/fanout_variations/at_mac_with_constraints.yaml +38 -0
  192. examples/arches/fanout_variations/at_mac_with_fanout_node.yaml +34 -0
  193. examples/arches/nvdla.yaml +47 -0
  194. examples/arches/simple.yaml +28 -0
  195. examples/arches/tpu_v4i.yaml +67 -0
  196. examples/mappings/unfused_matmuls_to_simple.yaml +33 -0
  197. examples/misc/component_annotated.yaml +33 -0
  198. examples/workloads/gpt3_6.7B.yaml +124 -0
  199. examples/workloads/matmuls.yaml +20 -0
  200. examples/workloads/mobilenet_28.yaml +81 -0
  201. examples/workloads/mobilenet_various_separate.yaml +106 -0
  202. examples/workloads/three_matmuls_annotated.yaml +59 -0
  203. notebooks/.ipynb_checkpoints/fastfusion_arch_study_michael-checkpoint.ipynb +359 -0
  204. notebooks/compute_in_memory/_scripts.py +339 -0
  205. notebooks/compute_in_memory/isaac.guide.ipynb +270 -0
  206. notebooks/compute_in_memory/wang_vlsi_2022.ipynb +602 -0
  207. notebooks/paths.py +4 -0
  208. notebooks/tutorials/.ipynb_checkpoints/1_FFM-checkpoint.ipynb +3110 -0
  209. notebooks/tutorials/FFM.ipynb +3498 -0
  210. notebooks/tutorials/_include.py +48 -0
  211. notebooks/tutorials/component_energy_area.ipynb +363 -0
  212. tests/Q_mapping.yaml +38 -0
  213. tests/__init__.py +0 -0
  214. tests/conv.mapping.yaml +27 -0
  215. tests/conv.workload.yaml +13 -0
  216. tests/conv_sym.mapping.yaml +43 -0
  217. tests/copy.mapping.yaml +35 -0
  218. tests/copy.workload.yaml +15 -0
  219. tests/distribuffers/__init__.py +0 -0
  220. tests/distribuffers/multicast/test_cases.yaml +482 -0
  221. tests/distribuffers/spec/binding/valid_bindings.yaml +97 -0
  222. tests/distribuffers/spec/distributed.yaml +100 -0
  223. tests/distribuffers/spec/logical_arch.yaml +32 -0
  224. tests/distribuffers/spec/physical_arch.yaml +69 -0
  225. tests/distribuffers/test_binding.py +48 -0
  226. tests/frontend/__init__.py +0 -0
  227. tests/frontend/test_mapping_viz.py +52 -0
  228. tests/mapper/__init__.py +0 -0
  229. tests/mapper/configs/conv1d/conv1d.mapping.yaml +31 -0
  230. tests/mapper/configs/conv1d/conv1d.workload.yaml +11 -0
  231. tests/mapper/configs/two_conv1d/two_conv1d.expected.yaml +38 -0
  232. tests/mapper/configs/two_conv1d/two_conv1d.mapping.yaml +54 -0
  233. tests/mapper/configs/two_conv1d/two_conv1d.workload.yaml +19 -0
  234. tests/mapper/test_mapping_to_isl.py +90 -0
  235. tests/mapper/test_spatial_reuse_analysis.py +67 -0
  236. tests/mapper/test_temporal_reuse_analysis.py +56 -0
  237. tests/mapper/util.py +58 -0
  238. tests/matmul.mapping.yaml +29 -0
  239. tests/matmul.workload.yaml +12 -0
  240. tests/matmul_spatial.mapping.yaml +44 -0
  241. tests/mha.renames.yaml +65 -0
  242. tests/mha.workload.yaml +67 -0
  243. tests/mha.yaml +59 -0
  244. tests/mha_full.workload.yaml +67 -0
  245. tests/mobilenet.workload.yaml +35 -0
  246. tests/mobilenet_long.workload.yaml +64 -0
  247. tests/pmappingcache.py +24 -0
  248. tests/processing_stage.arch.yaml +40 -0
  249. tests/snowcat.arch.yaml +36 -0
  250. tests/test_ffm_join_pmappings.py +106 -0
  251. tests/test_ffm_make_pmappings.py +82 -0
  252. tests/test_ffm_make_tile_shapes.py +49 -0
  253. tests/test_mapper.py +100 -0
  254. tests/test_model.py +37 -0
  255. tests/test_plotting.py +72 -0
  256. tests/test_processing_stage.py +46 -0
  257. tests/test_symbolic_model.py +248 -0
  258. tests/test_workload.py +141 -0
@@ -0,0 +1,310 @@
1
+ import copy
2
+ from typing import Callable
3
+ from uuid import UUID
4
+ from accelforge.frontend import arch
5
+ from accelforge.frontend.spec import Spec
6
+ from accelforge.mapper.FFM._join_pmappings.pmapping_group import PmappingGroup
7
+ from accelforge.frontend.workload import EinsumName
8
+ from accelforge.frontend.mapping import Mapping
9
+ from accelforge.mapper.FFM._make_pmappings.pmapper_job import Job
10
+
11
+
12
+ class MultiEinsumPmappings:
13
+ """
14
+ A collection of pmappings for each Einsum in a workload, generated by
15
+ :func:`~accelforge.mapper.FFM.make_pmappings`.
16
+
17
+ Parameters
18
+ ----------
19
+ einsum2pmappings:
20
+ A dictionary of Einsum names to lists of PmappingGroups. PmappingGroups contain
21
+ the Pareto-optimal pmappings for the Einsum.
22
+ pmapping_objects:
23
+ A dictionary of Einsum names to dictionaries of UUIDs to Mappings. The entries
24
+ in the PmappingGroup objects reference these pmapping objects.
25
+ einsum2jobs:
26
+ A dictionary of Einsum names to lists of Jobs that generated the pmappings.
27
+ can_combine_multiple_runs:
28
+ Whether the pmappings can be combined from multiple runs. If this is True, then
29
+ multiple make_pmappings calls can be summed together to get a single
30
+ MultiEinsumPmappings object. If this is True, the mapper may run more slowly.
31
+ einsums_with_pmappings_generated:
32
+ Einsums for which pmappings were generated (or attempted to be generated).
33
+ flattened_arches:
34
+ A dictionary of (EinsumName, Compute Name) to lists of architecture nodes. These
35
+ contain the parsed and flattened architecture node for that particular Einsum
36
+ and compute combination.
37
+ parsed_specs:
38
+ A dictionary of Einsum names to parsed specifications. These contain the parsed
39
+ specification for that particular Einsum.
40
+ """
41
+
42
+ def __init__(
43
+ self,
44
+ einsum2pmappings: dict[EinsumName, list[PmappingGroup]],
45
+ pmapping_objects: dict[EinsumName, dict[UUID, Mapping]],
46
+ einsum2jobs: dict[EinsumName, list[Job]],
47
+ can_combine_multiple_runs: bool,
48
+ einsums_with_pmappings_generated: set[EinsumName],
49
+ flattened_arches: dict[EinsumName, list[arch.Leaf]],
50
+ parsed_specs: dict[EinsumName, Spec],
51
+ ):
52
+ self.einsum2pmappings: dict[EinsumName, list[PmappingGroup]] = einsum2pmappings
53
+ self.pmapping_objects: dict[EinsumName, dict[UUID, Mapping]] = pmapping_objects
54
+ self.einsum2jobs: dict[EinsumName, list[Job]] = einsum2jobs
55
+ self.can_combine_multiple_runs: bool = can_combine_multiple_runs
56
+ self.einsums_with_pmappings_generated: set[EinsumName] = (
57
+ einsums_with_pmappings_generated
58
+ )
59
+ self.flattened_arches: dict[(EinsumName, str), list[arch.Leaf]] = (
60
+ flattened_arches
61
+ )
62
+ self.parsed_specs: dict[EinsumName, Spec] = parsed_specs
63
+
64
+ def __or__(self, other: "MultiEinsumPmappings"):
65
+ if not self.can_combine_multiple_runs or not other.can_combine_multiple_runs:
66
+ raise ValueError(
67
+ "Must call make_pmappings with can_combine_multiple_runs=True to combine pmappings "
68
+ "from multiple runs."
69
+ )
70
+ self = copy.copy(self)
71
+ for einsum_name, pmappings in other.einsum2pmappings.items():
72
+ self.einsum2pmappings.setdefault(einsum_name, []).extend(pmappings)
73
+ for einsum_name, jobs in other.einsum2jobs.items():
74
+ self.einsum2jobs.setdefault(einsum_name, []).extend(jobs)
75
+ self.pmapping_objects.update(other.pmapping_objects)
76
+ self.einsums_with_pmappings_generated.update(
77
+ other.einsums_with_pmappings_generated
78
+ )
79
+ self.parsed_specs.update(other.parsed_specs)
80
+ self.flattened_arches.update(other.flattened_arches)
81
+ return self
82
+
83
+ def _filter(
84
+ self,
85
+ filter_lambda: Callable[[PmappingGroup], bool],
86
+ einsums_with_pmappings_generated: list[EinsumName] | None = None,
87
+ ):
88
+ new_einsum2pmappings = {}
89
+ if einsums_with_pmappings_generated is None:
90
+ einsums_with_pmappings_generated = list(self.einsum2pmappings.keys())
91
+ for einsum_name in einsums_with_pmappings_generated:
92
+ new_einsum2pmappings[einsum_name] = [
93
+ pm for pm in self.einsum2pmappings[einsum_name] if filter_lambda(pm)
94
+ ]
95
+
96
+ return MultiEinsumPmappings(
97
+ einsum2pmappings=new_einsum2pmappings,
98
+ pmapping_objects=self.pmapping_objects,
99
+ einsum2jobs=self.einsum2jobs,
100
+ can_combine_multiple_runs=self.can_combine_multiple_runs,
101
+ einsums_with_pmappings_generated=self.einsums_with_pmappings_generated,
102
+ flattened_arches=self.flattened_arches,
103
+ parsed_specs=self.parsed_specs,
104
+ )
105
+
106
+ def drop_einsums(self, *einsums_with_pmappings_generated: EinsumName):
107
+ """
108
+ Removes all pmappings for the given Einsums.
109
+
110
+ Parameters
111
+ ----------
112
+ einsums_with_pmappings_generated:
113
+ The Einsums for which to remove pmappings.
114
+ """
115
+ for einsum_name in einsums_with_pmappings_generated:
116
+ del self.einsum2pmappings[einsum_name]
117
+ del self.pmapping_objects[einsum_name]
118
+ del self.einsum2jobs[einsum_name]
119
+ self.einsums_with_pmappings_generated.remove(einsum_name)
120
+
121
+ def pmapping_keep_rates(
122
+ self, per_einsum: bool = False
123
+ ) -> dict[EinsumName, dict[str, float]] | dict[str, float]:
124
+ """
125
+ Returns the keep rates for each cause of pmapping removal. For example, if only
126
+ 25% of the pmappings have a valid spatial fanout, then the keep rate for the
127
+ spatial fanout cause will be 0.25.
128
+
129
+ Parameters
130
+ ----------
131
+ per_einsum:
132
+ If True, returns a dictionary of keep rates for each Einsum.
133
+
134
+ Returns
135
+ -------
136
+ A dictionary of keep rates for each cause of pmapping removal.
137
+ """
138
+ result = {}
139
+ einsum2npmappings = self.n_total_pmappings(per_einsum=True)
140
+
141
+ for einsum_name, jobs in self.einsum2jobs.items():
142
+ cur_result = result.setdefault(einsum_name, {})
143
+ for job in jobs:
144
+ for cause, keep_rate in job.pmapping_keep_rates.items():
145
+ cur_result.setdefault(cause, 0)
146
+ cur_result[cause] += job.n_total_pmappings * keep_rate
147
+
148
+ if per_einsum:
149
+ for einsum_name, npmappings in einsum2npmappings.items():
150
+ for cause, keep_rate in result[einsum_name].items():
151
+ result[einsum_name][cause] = keep_rate / npmappings
152
+ else:
153
+ new_result = {}
154
+ n_total_pmappings = sum(einsum2npmappings.values())
155
+ for einsum_name, keep_rates in result.items():
156
+ for cause, keep_rate in keep_rates.items():
157
+ new_result.setdefault(cause, 0)
158
+ new_result[cause] += keep_rate / n_total_pmappings
159
+ result = new_result
160
+
161
+ return result
162
+
163
+ def n_total_pmappings(
164
+ self, per_einsum: bool = False
165
+ ) -> int | dict[EinsumName, int]:
166
+ """
167
+ Returns the number of total pmappings in the mapspace.
168
+
169
+ Parameters
170
+ ----------
171
+ per_einsum:
172
+ If True, returns a dictionary of total pmappings for each Einsum.
173
+
174
+ Returns
175
+ -------
176
+ The number of total pmappings in the mapspace.
177
+ """
178
+ result = {
179
+ einsum_name: sum(job.n_total_pmappings for job in jobs)
180
+ for einsum_name, jobs in self.einsum2jobs.items()
181
+ }
182
+ if per_einsum:
183
+ return result
184
+ return sum(result.values())
185
+
186
+ def n_valid_pmappings(
187
+ self, per_einsum: bool = False
188
+ ) -> int | dict[EinsumName, int]:
189
+ """
190
+ Returns the number of valid pmappings for each Einsum. A valid pmapping is one
191
+ that satisfies all constraints and resource usage limits.
192
+
193
+ Parameters
194
+ ----------
195
+ per_einsum:
196
+ If True, returns a dictionary of valid pmappings for each Einsum.
197
+
198
+ Returns
199
+ -------
200
+ The number of valid pmappings in the mapspace.
201
+ """
202
+ result = {
203
+ einsum_name: sum(job.n_valid_pmappings for job in jobs)
204
+ for einsum_name, jobs in self.einsum2jobs.items()
205
+ }
206
+ if per_einsum:
207
+ return result
208
+ return sum(result.values())
209
+
210
+ def n_pareto_optimal_pmappings(
211
+ self, per_einsum: bool = False
212
+ ) -> int | dict[EinsumName, int]:
213
+ """
214
+ Returns the number of Pareto-optimal pmappings for each Einsum. This is the
215
+ number of mappings that will be returned by the make_pmappings function.
216
+
217
+ Parameters
218
+ ----------
219
+ per_einsum:
220
+ If True, returns a dictionary of Pareto-optimal pmappings for each Einsum.
221
+
222
+ Returns
223
+ -------
224
+ The number of Pareto-optimal pmappings in the mapspace.
225
+ """
226
+ result = {
227
+ einsum_name: sum(len(p) for p in pmappings)
228
+ for einsum_name, pmappings in self.einsum2pmappings.items()
229
+ }
230
+ if per_einsum:
231
+ return result
232
+ return sum(result.values())
233
+
234
+ def n_evaluated_pmappings(
235
+ self, per_einsum: bool = False
236
+ ) -> int | dict[EinsumName, int]:
237
+ """
238
+ Returns the number of pmappings that were evaluated for each Einsum. This is
239
+ greater than the number of Pareto-optimal pmappings because some mappings are
240
+ found to be suboptimal after they have been evaluated.
241
+
242
+ Parameters
243
+ ----------
244
+ per_einsum:
245
+ If True, returns a dictionary of evaluated pmappings for each Einsum.
246
+
247
+ Returns
248
+ -------
249
+ The number of evaluated pmappings in the mapspace.
250
+ """
251
+
252
+ result = {
253
+ einsum_name: sum(job.n_evaluated_pmappings for job in jobs)
254
+ for einsum_name, jobs in self.einsum2jobs.items()
255
+ }
256
+ if per_einsum:
257
+ return result
258
+ return sum(result.values())
259
+
260
+ def n_pmapping_string(self) -> str:
261
+ """
262
+ Returns a string representation of the number of pmappings in the mapspace.
263
+ Printing this can help diagnose if the mapper is not finding any pmappings or
264
+ mappings.
265
+
266
+ Returns
267
+ -------
268
+ A string representation of the number of pmappings in the mapspace.
269
+ """
270
+ if "Total" in self.einsum2pmappings:
271
+ raise ValueError(
272
+ f"Cannot print stats for a MultiEinsumPmappings object that has "
273
+ f"an Einsum named 'Total'. Use a different name for the Einsum."
274
+ )
275
+
276
+ n_total_pmappings = self.n_total_pmappings(per_einsum=True)
277
+ n_valid_pmappings = self.n_valid_pmappings(per_einsum=True)
278
+ n_evaluated_pmappings = self.n_evaluated_pmappings(per_einsum=True)
279
+ n_pareto_optimal_pmappings = self.n_pareto_optimal_pmappings(per_einsum=True)
280
+
281
+ for x in (
282
+ n_total_pmappings,
283
+ n_valid_pmappings,
284
+ n_evaluated_pmappings,
285
+ n_pareto_optimal_pmappings,
286
+ ):
287
+ x["Total"] = sum(x.values())
288
+
289
+ s = []
290
+ for e in n_total_pmappings:
291
+ t = n_total_pmappings[e]
292
+ v = n_valid_pmappings[e]
293
+ ev = n_evaluated_pmappings[e]
294
+ p = n_pareto_optimal_pmappings[e]
295
+
296
+ def fmt(x, total: bool = True):
297
+ x = round(x)
298
+
299
+ def _f(y):
300
+ y = round(y)
301
+ return str(y) if y < 1000 else f"{y:.2e}".replace("e+", "e")
302
+
303
+ divved = _f(round(t) / x) if x != 0 else "inf"
304
+ return f"{_f(x)} (1/{divved})" if total else _f(x)
305
+
306
+ s.append(
307
+ f"{e}: {fmt(t, False)} total, {fmt(v)} valid, {fmt(ev)} evaluated, "
308
+ f"{fmt(p)} Pareto-Optimal"
309
+ )
310
+ return "\n".join(s)
@@ -0,0 +1,4 @@
1
+ import accelforge.mapper.FFM as FFM
2
+ from accelforge.frontend.mapper.metrics import Metrics
3
+
4
+ # import accelforge.mapper._simanneal2 as simanneal2
accelforge/mapper.py ADDED
File without changes
@@ -0,0 +1 @@
1
+ from accelforge.model.main import evaluate_mapping
File without changes
@@ -0,0 +1,335 @@
1
+ from dataclasses import dataclass
2
+ from typing import Optional, overload
3
+
4
+ # from bindings.looptree import TemporalTag, SequentialTag, PipelineTemporalTag
5
+
6
+ import islpy as isl
7
+
8
+ from accelforge.model._looptree.reuse.isl import IslReuseAnalysisOutput
9
+ from accelforge.model._looptree.reuse.symbolic import (
10
+ BuffetStats,
11
+ SymbolicAnalysisOutput,
12
+ )
13
+ from accelforge.model._looptree.mapping_utilities import get_paths, get_leaves
14
+
15
+ from accelforge.frontend.mapping import Mapping, TensorHolder, Compute
16
+ from accelforge.frontend.workload import Workload
17
+
18
+ # from pytimeloop._isl.singular import get_sum_of_pw_qpolynomial
19
+ # from pytimeloop._isl.sum import sum_with_mask
20
+
21
+
22
+ @dataclass(eq=True)
23
+ class Accesses:
24
+ total_reads: float
25
+ total_writes: float
26
+ max_per_unit_reads: float
27
+ max_per_unit_writes: float
28
+
29
+
30
+ class BufferAccesses:
31
+ def __init__(self):
32
+ self.accesses: dict[tuple, Accesses] = {}
33
+
34
+ def get_accesses(self, buffer, dspace, einsum) -> Accesses:
35
+ key = (buffer, dspace, einsum)
36
+ if key not in self.accesses:
37
+ self.accesses[key] = Accesses(0, 0, 0, 0)
38
+ return self.accesses[key]
39
+
40
+ def items(self):
41
+ return self.accesses.items()
42
+
43
+ def items_with_buffer(self, ref_buffer):
44
+ """Returns iterator similar to `items` but only for `ref_buffer`"""
45
+ return (
46
+ ((buffer, dspace, einsum), value)
47
+ for (buffer, dspace, einsum), value in self.accesses.items()
48
+ if buffer == ref_buffer
49
+ )
50
+
51
+ def __str__(self):
52
+ return repr(self.accesses)
53
+
54
+ def __repr__(self):
55
+ return f"BufferAccesses({repr(self.accesses)})"
56
+
57
+
58
+ @overload
59
+ def summarize_total_and_per_unit_actions(
60
+ reuse_analysis_result: IslReuseAnalysisOutput,
61
+ ) -> dict[tuple, BuffetStats]:
62
+ pass
63
+
64
+
65
+ @overload
66
+ def summarize_total_and_per_unit_actions(
67
+ reuse_analysis_result: SymbolicAnalysisOutput,
68
+ ) -> dict[tuple, BuffetStats]:
69
+ pass
70
+
71
+
72
+ def summarize_total_and_per_unit_actions(
73
+ reuse_analysis_result,
74
+ ) -> dict[tuple, BuffetStats]:
75
+ result = {}
76
+ if isinstance(reuse_analysis_result, IslReuseAnalysisOutput):
77
+ reads_to_parent = reuse_analysis_result.reads_to_parent
78
+ reads_to_peer = reuse_analysis_result.reads_to_peer
79
+ for key, (tags, fill) in reuse_analysis_result.fills.items():
80
+ read_to_parent = reads_to_parent[key][1]
81
+ read_to_peer = reads_to_peer[key][1]
82
+
83
+ total_fill = get_sum_of_pw_qpolynomial(fill)
84
+ total_read_to_parent = get_sum_of_pw_qpolynomial(read_to_parent)
85
+ total_read_to_peer = get_sum_of_pw_qpolynomial(read_to_peer)
86
+
87
+ max_per_unit_fill = _sum_over_temporal_max_over_spatial(tags, fill)
88
+
89
+ n_read_to_parent_dim = read_to_parent.dim(isl.dim_type.in_)
90
+ max_per_unit_read_to_parent = _sum_over_temporal_max_over_spatial(
91
+ tags[:n_read_to_parent_dim], read_to_parent
92
+ )
93
+
94
+ max_per_unit_read_to_peer = _sum_over_temporal_max_over_spatial(
95
+ tags, read_to_peer
96
+ )
97
+
98
+ buffet_stats = BuffetStats(
99
+ total_fills=total_fill,
100
+ total_reads_to_parent=total_read_to_parent,
101
+ total_reads_to_peer=total_read_to_peer,
102
+ max_per_unit_fills=max_per_unit_fill,
103
+ max_per_parent_reads_to_parent=max_per_unit_read_to_parent,
104
+ max_per_unit_reads_to_peer=max_per_unit_read_to_peer,
105
+ )
106
+
107
+ elif isinstance(reuse_analysis_result, SymbolicAnalysisOutput):
108
+ for buffet, buffet_stats in reuse_analysis_result.buffet_stats.items():
109
+ level = buffet.level
110
+ einsum = buffet.einsum
111
+ key = (level, buffet.tensor, einsum)
112
+ result[key] = buffet_stats
113
+
114
+ return result
115
+
116
+
117
+ @overload
118
+ def isl_buffer_accesses_from_buffet_actions(
119
+ reuse_analysis_result: IslReuseAnalysisOutput, mapping, workload, is_path=False
120
+ ) -> BufferAccesses:
121
+ pass
122
+
123
+
124
+ @overload
125
+ def isl_buffer_accesses_from_buffet_actions(
126
+ reuse_analysis_result: SymbolicAnalysisOutput, mapping, workload, is_path=False
127
+ ) -> BufferAccesses:
128
+ pass
129
+
130
+
131
+ # TODO: is_path should be removed and we should accept only regular mappings
132
+ def isl_buffer_accesses_from_buffet_actions(
133
+ reuse_analysis_result, mapping, workload: Workload, is_path=False
134
+ ) -> BufferAccesses:
135
+ mapping = mapping.nodes
136
+
137
+ parent_buffers = get_parent_buffers(mapping, workload, is_path)
138
+
139
+ compute_targets = set()
140
+ for compute_node in get_leaves(mapping, is_path):
141
+ assert isinstance(compute_node, Compute)
142
+ compute_targets.add(compute_node.component)
143
+
144
+ summarized_actions = summarize_total_and_per_unit_actions(reuse_analysis_result)
145
+
146
+ accesses_results = BufferAccesses()
147
+ for (buffer_id, tensor, einsum), stats in summarized_actions.items():
148
+ fill = stats.total_fills # Writes
149
+ read_to_parent = stats.total_reads_to_parent # Reads to parent
150
+ read_to_peer = stats.total_reads_to_peer # Reads to peer
151
+ max_per_unit_fill = stats.max_per_unit_fills
152
+ max_per_parent_read_to_parent = stats.max_per_parent_reads_to_parent
153
+ max_per_unit_read_to_peer = stats.max_per_unit_reads_to_peer
154
+
155
+ parent_buffer = parent_buffers[(buffer_id, tensor, einsum)]
156
+ if parent_buffer is not None:
157
+ parent_is_backing = parent_buffers[(parent_buffer, tensor, einsum)] is None
158
+
159
+ accesses = accesses_results.get_accesses(parent_buffer, tensor, einsum)
160
+ if tensor in workload.einsums[einsum].output_tensor_names:
161
+ accesses.total_writes += read_to_parent
162
+ accesses.total_reads += read_to_parent
163
+
164
+ accesses.max_per_unit_reads += max_per_parent_read_to_parent
165
+ accesses.max_per_unit_writes += max_per_parent_read_to_parent
166
+
167
+ if read_to_parent == 0 and max_per_parent_read_to_parent != 0:
168
+ raise ValueError(
169
+ f"read_to_parent is 0 but max_per_parent_read_to_parent is {max_per_parent_read_to_parent}"
170
+ )
171
+ if read_to_parent == 0 and max_per_unit_read_to_peer == 0:
172
+ per_unit_to_total = 0
173
+ else:
174
+ per_unit_to_total = max_per_parent_read_to_parent / read_to_parent
175
+
176
+ # TODO: Do this per unit properly by recursing on first iteration in symbolic.py
177
+ # and passing a flag that says whether this is first iteration
178
+ if parent_is_backing:
179
+ elidable_reads = reuse_analysis_result.elidable_reads.get(tensor, 0)
180
+ accesses.total_reads -= elidable_reads
181
+ accesses.max_per_unit_reads -= per_unit_to_total * elidable_reads
182
+
183
+ elif tensor in workload.einsums[einsum].input_tensor_names:
184
+ accesses.total_reads += read_to_parent
185
+
186
+ accesses.max_per_unit_reads += max_per_parent_read_to_parent
187
+
188
+ # Fills will write into current buffer except for compute (which does
189
+ # not have write action) and top-level buffer
190
+ if buffer_id not in compute_targets and parent_buffer is not None:
191
+ accesses = accesses_results.get_accesses(buffer_id, tensor, einsum)
192
+ if tensor in workload.einsums[einsum].output_tensor_names:
193
+ accesses.total_writes += fill
194
+ accesses.max_per_unit_writes += max_per_unit_fill
195
+
196
+ # # TODO: figure out how to do this per unit
197
+ # total_elided_writes = get_tensor_size(workload, tensor)
198
+ # accesses.total_writes -= total_elided_writes
199
+ else:
200
+ accesses.total_writes += fill
201
+ accesses.max_per_unit_writes += max_per_unit_fill
202
+
203
+ accesses.total_reads += read_to_peer
204
+ accesses.max_per_unit_reads += max_per_unit_read_to_peer
205
+
206
+ return accesses_results
207
+
208
+
209
+ def get_parent_buffers(mapping: Mapping, workload: Workload, is_path):
210
+ parent_buffers = {}
211
+ if is_path:
212
+ paths = [mapping]
213
+ else:
214
+ paths = get_paths(mapping)
215
+
216
+ for path in paths:
217
+ leaf = path[-1]
218
+ einsum = leaf.einsum
219
+
220
+ tensor_to_top_buffer = {}
221
+ for node in path:
222
+ if isinstance(node, TensorHolder):
223
+ for tensor in node.tensors:
224
+ key = (node.component, tensor, einsum)
225
+ if tensor in tensor_to_top_buffer:
226
+ parent_buffers[key] = tensor_to_top_buffer[tensor]
227
+ else:
228
+ parent_buffers[key] = None
229
+ tensor_to_top_buffer[tensor] = node.component
230
+ elif isinstance(node, Compute):
231
+ for tensor in workload.einsums[einsum].input_tensor_names:
232
+ key = (node.component, tensor, einsum)
233
+ if tensor in tensor_to_top_buffer:
234
+ parent_buffers[key] = tensor_to_top_buffer[tensor]
235
+ for tensor in workload.einsums[einsum].output_tensor_names:
236
+ key = (node.component, tensor, einsum)
237
+ if tensor in tensor_to_top_buffer:
238
+ parent_buffers[key] = tensor_to_top_buffer[tensor]
239
+
240
+ return parent_buffers
241
+
242
+
243
+ def _sum_over_temporal_max_over_spatial(tags, actions):
244
+ return (
245
+ sum_with_mask(
246
+ [
247
+ (
248
+ isinstance(t, TemporalTag)
249
+ or isinstance(t, PipelineTemporalTag)
250
+ or isinstance(t, SequentialTag)
251
+ )
252
+ for t in tags
253
+ ],
254
+ actions,
255
+ )
256
+ .max()
257
+ .to_python()
258
+ )
259
+
260
+
261
+ def isl_buffer_accesses_from_buffet_actions(
262
+ reuse_analysis_result, mapping, workload: Workload, is_path=False
263
+ ) -> BufferAccesses:
264
+ mapping = mapping.nodes
265
+
266
+ parent_buffers = get_parent_buffers(mapping, workload, is_path)
267
+
268
+ compute_targets = set()
269
+ for compute_node in get_leaves(mapping, is_path):
270
+ assert isinstance(compute_node, Compute)
271
+ compute_targets.add(compute_node.component)
272
+
273
+ summarized_actions = summarize_total_and_per_unit_actions(reuse_analysis_result)
274
+
275
+ accesses_results = BufferAccesses()
276
+ for (buffer_id, tensor, einsum), stats in summarized_actions.items():
277
+ fill = stats.total_fills # Writes
278
+ read_to_parent = stats.total_reads_to_parent # Reads to parent
279
+ read_to_peer = stats.total_reads_to_peer # Reads to peer
280
+ max_per_unit_fill = stats.max_per_unit_fills
281
+ max_per_parent_read_to_parent = stats.max_per_parent_reads_to_parent
282
+ max_per_unit_read_to_peer = stats.max_per_unit_reads_to_peer
283
+
284
+ parent_buffer = parent_buffers[(buffer_id, tensor, einsum)]
285
+ if parent_buffer is not None:
286
+ parent_is_backing = parent_buffers[(parent_buffer, tensor, einsum)] is None
287
+
288
+ accesses = accesses_results.get_accesses(parent_buffer, tensor, einsum)
289
+ if tensor in workload.einsums[einsum].output_tensor_names:
290
+ accesses.total_writes += read_to_parent
291
+ accesses.total_reads += read_to_parent
292
+
293
+ accesses.max_per_unit_reads += max_per_parent_read_to_parent
294
+ accesses.max_per_unit_writes += max_per_parent_read_to_parent
295
+
296
+ if read_to_parent == 0 and max_per_parent_read_to_parent != 0:
297
+ raise ValueError(
298
+ f"read_to_parent is 0 but max_per_parent_read_to_parent is {max_per_parent_read_to_parent}"
299
+ )
300
+ if read_to_parent == 0 and max_per_unit_read_to_peer == 0:
301
+ per_unit_to_total = 0
302
+ else:
303
+ per_unit_to_total = max_per_parent_read_to_parent / read_to_parent
304
+
305
+ # TODO: Do this per unit properly by recursing on first iteration in symbolic.py
306
+ # and passing a flag that says whether this is first iteration
307
+ if parent_is_backing:
308
+ elidable_reads = reuse_analysis_result.elidable_reads.get(tensor, 0)
309
+ accesses.total_reads -= elidable_reads
310
+ accesses.max_per_unit_reads -= per_unit_to_total * elidable_reads
311
+
312
+ elif tensor in workload.einsums[einsum].input_tensor_names:
313
+ accesses.total_reads += read_to_parent
314
+
315
+ accesses.max_per_unit_reads += max_per_parent_read_to_parent
316
+
317
+ # Fills will write into current buffer except for compute (which does
318
+ # not have write action) and top-level buffer
319
+ if buffer_id not in compute_targets and parent_buffer is not None:
320
+ accesses = accesses_results.get_accesses(buffer_id, tensor, einsum)
321
+ if tensor in workload.einsums[einsum].output_tensor_names:
322
+ accesses.total_writes += fill
323
+ accesses.max_per_unit_writes += max_per_unit_fill
324
+
325
+ # # TODO: figure out how to do this per unit
326
+ # total_elided_writes = get_tensor_size(workload, tensor)
327
+ # accesses.total_writes -= total_elided_writes
328
+ else:
329
+ accesses.total_writes += fill
330
+ accesses.max_per_unit_writes += max_per_unit_fill
331
+
332
+ accesses.total_reads += read_to_peer
333
+ accesses.max_per_unit_reads += max_per_unit_read_to_peer
334
+
335
+ return accesses_results
@@ -0,0 +1 @@
1
+ from .capacity import compute_capacity_usage
@@ -0,0 +1,36 @@
1
+ def compute_max(child_caps, caps):
2
+ buf_to_max = {}
3
+ for child_cap in child_caps:
4
+ buf_to_child_usage = {}
5
+
6
+ for buf, v in child_cap.items():
7
+ if buf not in buf_to_child_usage:
8
+ buf_to_child_usage[buf] = 0
9
+ if buf not in buf_to_max:
10
+ buf_to_max[buf] = 0
11
+
12
+ buf_to_child_usage[buf] += v
13
+
14
+ for buf in buf_to_child_usage:
15
+ buf_to_max[buf] = max(buf_to_max[buf], buf_to_child_usage[buf])
16
+
17
+ for buf, c in buf_to_max.items():
18
+ if buf not in caps:
19
+ caps[buf] = 0
20
+ caps[buf] += c
21
+
22
+
23
+ def compute_total(child_caps, caps):
24
+ for child_cap in child_caps:
25
+ for buf, v in child_cap.items():
26
+ if buf in caps:
27
+ caps[buf] += v
28
+ else:
29
+ caps[buf] = v
30
+
31
+
32
+ CAPACITY_AGGREGATORS = {
33
+ "sequential": compute_max,
34
+ "pipeline": compute_total,
35
+ "parallel": compute_total,
36
+ }