accelforge 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (258) hide show
  1. accelforge/__init__.py +21 -0
  2. accelforge/_accelerated_imports.py +16 -0
  3. accelforge/_deprecate/_simanneal/evalmapping.py +271 -0
  4. accelforge/_deprecate/_simanneal/mapspaceglobals.py +298 -0
  5. accelforge/_deprecate/_simanneal/simanneal.py +666 -0
  6. accelforge/_deprecate/_simanneal/tracking.py +105 -0
  7. accelforge/_deprecate/_simanneal/wrappers.py +218 -0
  8. accelforge/_deprecate/_simanneal2/__init__.py +7 -0
  9. accelforge/_deprecate/_simanneal2/simanneal.py +493 -0
  10. accelforge/_deprecate/_simanneal2/tracking.py +116 -0
  11. accelforge/_deprecate/compatibility_util.py +181 -0
  12. accelforge/_deprecate/layerdeduplication/__init__.py +2 -0
  13. accelforge/_deprecate/layerdeduplication/group_similar_einsums.py +160 -0
  14. accelforge/_deprecate/layerdeduplication/grouped_einsums.py +84 -0
  15. accelforge/_deprecate/mapping_filter_tags/__init__.py +2 -0
  16. accelforge/_deprecate/mapping_filter_tags/ffmt.py +212 -0
  17. accelforge/_deprecate/mapping_filter_tags/onesplit.py +24 -0
  18. accelforge/_deprecate/mapping_filter_tags/util.py +24 -0
  19. accelforge/_deprecate/tags.py +69 -0
  20. accelforge/_deprecate/viz/__init__.py +0 -0
  21. accelforge/_deprecate/viz/interactive.py +159 -0
  22. accelforge/_deprecate/viz/reservationtree.py +307 -0
  23. accelforge/_deprecate/viz/ski_slope.py +88 -0
  24. accelforge/_version.py +15 -0
  25. accelforge/examples.py +39 -0
  26. accelforge/frontend/__init__.py +10 -0
  27. accelforge/frontend/_binding.py +129 -0
  28. accelforge/frontend/_workload_isl/__init__.py +2 -0
  29. accelforge/frontend/_workload_isl/_isl.py +149 -0
  30. accelforge/frontend/_workload_isl/_symbolic.py +141 -0
  31. accelforge/frontend/arch copy.py +1544 -0
  32. accelforge/frontend/arch.py +1642 -0
  33. accelforge/frontend/config.py +63 -0
  34. accelforge/frontend/mapper/__init__.py +5 -0
  35. accelforge/frontend/mapper/ffm.py +126 -0
  36. accelforge/frontend/mapper/mapper.py +7 -0
  37. accelforge/frontend/mapper/metrics.py +30 -0
  38. accelforge/frontend/mapping/__init__.py +1 -0
  39. accelforge/frontend/mapping/mapping.py +1736 -0
  40. accelforge/frontend/model.py +14 -0
  41. accelforge/frontend/renames.py +150 -0
  42. accelforge/frontend/spec copy.py +230 -0
  43. accelforge/frontend/spec.py +301 -0
  44. accelforge/frontend/variables.py +12 -0
  45. accelforge/frontend/workload.py +952 -0
  46. accelforge/mapper/FFM/__init__.py +9 -0
  47. accelforge/mapper/FFM/_join_pmappings/__init__.py +0 -0
  48. accelforge/mapper/FFM/_join_pmappings/compatibility.py +653 -0
  49. accelforge/mapper/FFM/_join_pmappings/compress_pmappings.py +140 -0
  50. accelforge/mapper/FFM/_join_pmappings/join_pmappings.py +703 -0
  51. accelforge/mapper/FFM/_join_pmappings/pmapping_dataframe.py +901 -0
  52. accelforge/mapper/FFM/_join_pmappings/pmapping_group.py +337 -0
  53. accelforge/mapper/FFM/_make_pmappings/contraints/__init__.py +0 -0
  54. accelforge/mapper/FFM/_make_pmappings/contraints/constraints.py +360 -0
  55. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/__init__.py +1 -0
  56. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_loops.py +373 -0
  57. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_pmapping_templates.py +463 -0
  58. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_reservations.py +95 -0
  59. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storage_order.py +382 -0
  60. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storages.py +155 -0
  61. accelforge/mapper/FFM/_make_pmappings/make_pmappings.py +411 -0
  62. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/__init__.py +1 -0
  63. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_pmappings_from_templates.py +407 -0
  64. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_tile_shapes.py +1681 -0
  65. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/run_model.py +170 -0
  66. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/symbol_relations.py +174 -0
  67. accelforge/mapper/FFM/_make_pmappings/pmapper_job.py +282 -0
  68. accelforge/mapper/FFM/_pareto_df/df_convention.py +273 -0
  69. accelforge/mapper/FFM/_pareto_df/pareto copy.py +836 -0
  70. accelforge/mapper/FFM/_pareto_df/pareto.py +508 -0
  71. accelforge/mapper/FFM/data.py +61 -0
  72. accelforge/mapper/FFM/main copy.py +236 -0
  73. accelforge/mapper/FFM/main.py +208 -0
  74. accelforge/mapper/FFM/mappings.py +510 -0
  75. accelforge/mapper/FFM/pmappings.py +310 -0
  76. accelforge/mapper/__init__.py +4 -0
  77. accelforge/mapper.py +0 -0
  78. accelforge/model/__init__.py +1 -0
  79. accelforge/model/_looptree/__init__.py +0 -0
  80. accelforge/model/_looptree/accesses.py +335 -0
  81. accelforge/model/_looptree/capacity/__init__.py +1 -0
  82. accelforge/model/_looptree/capacity/aggregators.py +36 -0
  83. accelforge/model/_looptree/capacity/capacity.py +47 -0
  84. accelforge/model/_looptree/energy.py +150 -0
  85. accelforge/model/_looptree/equivalent_ranks.py +29 -0
  86. accelforge/model/_looptree/latency/__init__.py +1 -0
  87. accelforge/model/_looptree/latency/latency.py +98 -0
  88. accelforge/model/_looptree/latency/memory.py +120 -0
  89. accelforge/model/_looptree/latency/processors.py +92 -0
  90. accelforge/model/_looptree/mapping_utilities.py +71 -0
  91. accelforge/model/_looptree/reuse/__init__.py +4 -0
  92. accelforge/model/_looptree/reuse/isl/__init__.py +1 -0
  93. accelforge/model/_looptree/reuse/isl/des.py +59 -0
  94. accelforge/model/_looptree/reuse/isl/isl_functions.py +374 -0
  95. accelforge/model/_looptree/reuse/isl/mapping_to_isl/__init__.py +4 -0
  96. accelforge/model/_looptree/reuse/isl/mapping_to_isl/analyze_mapping.py +297 -0
  97. accelforge/model/_looptree/reuse/isl/mapping_to_isl/skews_from_mapping.py +236 -0
  98. accelforge/model/_looptree/reuse/isl/mapping_to_isl/tiling.py +685 -0
  99. accelforge/model/_looptree/reuse/isl/mapping_to_isl/types.py +188 -0
  100. accelforge/model/_looptree/reuse/isl/spatial.py +260 -0
  101. accelforge/model/_looptree/reuse/isl/temporal.py +182 -0
  102. accelforge/model/_looptree/reuse/symbolic/__init__.py +1 -0
  103. accelforge/model/_looptree/reuse/symbolic/symbolic copy 2.py +1346 -0
  104. accelforge/model/_looptree/reuse/symbolic/symbolic copy.py +1408 -0
  105. accelforge/model/_looptree/reuse/symbolic/symbolic.py +1396 -0
  106. accelforge/model/_looptree/run.py +122 -0
  107. accelforge/model/_looptree/types.py +26 -0
  108. accelforge/model/_looptree/visualization/__init__.py +0 -0
  109. accelforge/model/_looptree/visualization/occupancy.py +11 -0
  110. accelforge/model/main.py +222 -0
  111. accelforge/plotting/__init__.py +2 -0
  112. accelforge/plotting/mappings.py +219 -0
  113. accelforge/plotting/specs.py +57 -0
  114. accelforge/util/__init__.py +4 -0
  115. accelforge/util/_base_analysis_types.py +24 -0
  116. accelforge/util/_basetypes.py +1089 -0
  117. accelforge/util/_frozenset.py +36 -0
  118. accelforge/util/_isl.py +29 -0
  119. accelforge/util/_itertools.py +14 -0
  120. accelforge/util/_mathfuncs.py +57 -0
  121. accelforge/util/_parse_expressions.py +339 -0
  122. accelforge/util/_picklecache.py +32 -0
  123. accelforge/util/_setexpressions.py +268 -0
  124. accelforge/util/_sympy/__init__.py +0 -0
  125. accelforge/util/_sympy/broadcast_max.py +18 -0
  126. accelforge/util/_visualization.py +112 -0
  127. accelforge/util/_yaml.py +579 -0
  128. accelforge/util/parallel.py +193 -0
  129. accelforge-0.0.1.dist-info/METADATA +64 -0
  130. accelforge-0.0.1.dist-info/RECORD +258 -0
  131. accelforge-0.0.1.dist-info/WHEEL +5 -0
  132. accelforge-0.0.1.dist-info/licenses/LICENSE +19 -0
  133. accelforge-0.0.1.dist-info/top_level.txt +5 -0
  134. docs/_build/html/_sources/fastfusion.frontend.mapper.rst.txt +37 -0
  135. docs/_build/html/_sources/fastfusion.frontend.rst.txt +70 -0
  136. docs/_build/html/_sources/fastfusion.frontend.workload.rst.txt +21 -0
  137. docs/_build/html/_sources/fastfusion.mapper.FFM.rst.txt +37 -0
  138. docs/_build/html/_sources/fastfusion.mapper.rst.txt +18 -0
  139. docs/_build/html/_sources/fastfusion.rst.txt +20 -0
  140. docs/_build/html/_sources/fastfusion.util.rst.txt +21 -0
  141. docs/_build/html/_sources/index.rst.txt +87 -0
  142. docs/_build/html/_sources/modules.rst.txt +7 -0
  143. docs/_build/html/_sources/notes/citation.rst.txt +45 -0
  144. docs/_build/html/_sources/notes/definitions.rst.txt +43 -0
  145. docs/_build/html/_sources/notes/faqs.rst.txt +39 -0
  146. docs/_build/html/_sources/notes/modeling/accelerator_energy_latency.rst.txt +72 -0
  147. docs/_build/html/_sources/notes/modeling/component_energy_area.rst.txt +96 -0
  148. docs/_build/html/_sources/notes/modeling/mapping.rst.txt +100 -0
  149. docs/_build/html/_sources/notes/modeling.rst.txt +33 -0
  150. docs/_build/html/_sources/notes/parsing/arithmetic_parsing.rst.txt +136 -0
  151. docs/_build/html/_sources/notes/parsing/setexpressions.rst.txt +63 -0
  152. docs/_build/html/_sources/notes/parsing/yaml_parsing.rst.txt +176 -0
  153. docs/_build/html/_sources/notes/quickstart_and_installation.rst.txt +9 -0
  154. docs/_build/html/_sources/notes/spec/architecture.rst.txt +133 -0
  155. docs/_build/html/_sources/notes/spec/mapping.rst.txt +12 -0
  156. docs/_build/html/_sources/notes/spec/workload.rst.txt +83 -0
  157. docs/_build/html/_sources/notes/spec.rst.txt +36 -0
  158. docs/source/_ext/include_attrs.py +213 -0
  159. docs/source/_ext/include_docstring.py +364 -0
  160. docs/source/_ext/include_functions.py +154 -0
  161. docs/source/_ext/include_notebook.py +131 -0
  162. docs/source/_ext/include_yaml.py +119 -0
  163. docs/source/_ext/inherited_attributes.py +222 -0
  164. docs/source/_ext/paths.py +4 -0
  165. docs/source/conf.py +79 -0
  166. examples/arches/compute_in_memory/_include.yaml +74 -0
  167. examples/arches/compute_in_memory/_include_functions.py +229 -0
  168. examples/arches/compute_in_memory/_load_spec.py +57 -0
  169. examples/arches/compute_in_memory/components/c2c_multiplier.py +181 -0
  170. examples/arches/compute_in_memory/components/dac_c2c_r2r.py +605 -0
  171. examples/arches/compute_in_memory/components/misc.py +195 -0
  172. examples/arches/compute_in_memory/components/util/bit_functions.py +51 -0
  173. examples/arches/compute_in_memory/components/zero_comparator.py +92 -0
  174. examples/arches/compute_in_memory/isaac.yaml +233 -0
  175. examples/arches/compute_in_memory/memory_cells/ecram_demo.yaml +63 -0
  176. examples/arches/compute_in_memory/memory_cells/rram_example.yaml +63 -0
  177. examples/arches/compute_in_memory/memory_cells/rram_isaac_isca_2016.yaml +64 -0
  178. examples/arches/compute_in_memory/memory_cells/rram_neurosim_default.yaml +63 -0
  179. examples/arches/compute_in_memory/memory_cells/rram_raella_isca_2023.yaml +70 -0
  180. examples/arches/compute_in_memory/memory_cells/rram_wan_nature_2022.yaml +63 -0
  181. examples/arches/compute_in_memory/memory_cells/sram_colonnade_jssc_2021.yaml +63 -0
  182. examples/arches/compute_in_memory/memory_cells/sram_example.yaml +63 -0
  183. examples/arches/compute_in_memory/memory_cells/sram_jia_jssc_2020.yaml +63 -0
  184. examples/arches/compute_in_memory/memory_cells/sram_sinangil_jssc_2021.yaml +63 -0
  185. examples/arches/compute_in_memory/memory_cells/sram_wang_vlsi_2022.yaml +63 -0
  186. examples/arches/compute_in_memory/wang_vlsi_2022.yaml +289 -0
  187. examples/arches/eyeriss.yaml +68 -0
  188. examples/arches/fanout_variations/at_glb.yaml +31 -0
  189. examples/arches/fanout_variations/at_glb_with_fanout_node.yaml +34 -0
  190. examples/arches/fanout_variations/at_mac.yaml +31 -0
  191. examples/arches/fanout_variations/at_mac_with_constraints.yaml +38 -0
  192. examples/arches/fanout_variations/at_mac_with_fanout_node.yaml +34 -0
  193. examples/arches/nvdla.yaml +47 -0
  194. examples/arches/simple.yaml +28 -0
  195. examples/arches/tpu_v4i.yaml +67 -0
  196. examples/mappings/unfused_matmuls_to_simple.yaml +33 -0
  197. examples/misc/component_annotated.yaml +33 -0
  198. examples/workloads/gpt3_6.7B.yaml +124 -0
  199. examples/workloads/matmuls.yaml +20 -0
  200. examples/workloads/mobilenet_28.yaml +81 -0
  201. examples/workloads/mobilenet_various_separate.yaml +106 -0
  202. examples/workloads/three_matmuls_annotated.yaml +59 -0
  203. notebooks/.ipynb_checkpoints/fastfusion_arch_study_michael-checkpoint.ipynb +359 -0
  204. notebooks/compute_in_memory/_scripts.py +339 -0
  205. notebooks/compute_in_memory/isaac.guide.ipynb +270 -0
  206. notebooks/compute_in_memory/wang_vlsi_2022.ipynb +602 -0
  207. notebooks/paths.py +4 -0
  208. notebooks/tutorials/.ipynb_checkpoints/1_FFM-checkpoint.ipynb +3110 -0
  209. notebooks/tutorials/FFM.ipynb +3498 -0
  210. notebooks/tutorials/_include.py +48 -0
  211. notebooks/tutorials/component_energy_area.ipynb +363 -0
  212. tests/Q_mapping.yaml +38 -0
  213. tests/__init__.py +0 -0
  214. tests/conv.mapping.yaml +27 -0
  215. tests/conv.workload.yaml +13 -0
  216. tests/conv_sym.mapping.yaml +43 -0
  217. tests/copy.mapping.yaml +35 -0
  218. tests/copy.workload.yaml +15 -0
  219. tests/distribuffers/__init__.py +0 -0
  220. tests/distribuffers/multicast/test_cases.yaml +482 -0
  221. tests/distribuffers/spec/binding/valid_bindings.yaml +97 -0
  222. tests/distribuffers/spec/distributed.yaml +100 -0
  223. tests/distribuffers/spec/logical_arch.yaml +32 -0
  224. tests/distribuffers/spec/physical_arch.yaml +69 -0
  225. tests/distribuffers/test_binding.py +48 -0
  226. tests/frontend/__init__.py +0 -0
  227. tests/frontend/test_mapping_viz.py +52 -0
  228. tests/mapper/__init__.py +0 -0
  229. tests/mapper/configs/conv1d/conv1d.mapping.yaml +31 -0
  230. tests/mapper/configs/conv1d/conv1d.workload.yaml +11 -0
  231. tests/mapper/configs/two_conv1d/two_conv1d.expected.yaml +38 -0
  232. tests/mapper/configs/two_conv1d/two_conv1d.mapping.yaml +54 -0
  233. tests/mapper/configs/two_conv1d/two_conv1d.workload.yaml +19 -0
  234. tests/mapper/test_mapping_to_isl.py +90 -0
  235. tests/mapper/test_spatial_reuse_analysis.py +67 -0
  236. tests/mapper/test_temporal_reuse_analysis.py +56 -0
  237. tests/mapper/util.py +58 -0
  238. tests/matmul.mapping.yaml +29 -0
  239. tests/matmul.workload.yaml +12 -0
  240. tests/matmul_spatial.mapping.yaml +44 -0
  241. tests/mha.renames.yaml +65 -0
  242. tests/mha.workload.yaml +67 -0
  243. tests/mha.yaml +59 -0
  244. tests/mha_full.workload.yaml +67 -0
  245. tests/mobilenet.workload.yaml +35 -0
  246. tests/mobilenet_long.workload.yaml +64 -0
  247. tests/pmappingcache.py +24 -0
  248. tests/processing_stage.arch.yaml +40 -0
  249. tests/snowcat.arch.yaml +36 -0
  250. tests/test_ffm_join_pmappings.py +106 -0
  251. tests/test_ffm_make_pmappings.py +82 -0
  252. tests/test_ffm_make_tile_shapes.py +49 -0
  253. tests/test_mapper.py +100 -0
  254. tests/test_model.py +37 -0
  255. tests/test_plotting.py +72 -0
  256. tests/test_processing_stage.py +46 -0
  257. tests/test_symbolic_model.py +248 -0
  258. tests/test_workload.py +141 -0
@@ -0,0 +1,47 @@
1
+ from .aggregators import CAPACITY_AGGREGATORS
2
+
3
+
4
+ def get_value_from_singular_qpolynomial(qp):
5
+ return qp.eval(qp.domain().sample_point())
6
+
7
+
8
+ def compute_capacity_usage(mapping, occupancy, workload):
9
+ caps = {}
10
+ tensor_name_to_id = workload.data_space_name_to_id()
11
+ einsum_name_to_id = workload.einsum_name_to_id()
12
+
13
+ for node in mapping:
14
+ einsums = get_einsums(mapping)
15
+ if node["type"] == "storage":
16
+ buf = node["target"]
17
+ if buf not in caps:
18
+ caps[buf] = 0
19
+
20
+ for tensor in node["dspace"]:
21
+ tensor_id = tensor_name_to_id[tensor]
22
+ max_cap = 0
23
+ for einsum in einsums:
24
+ einsum_id = einsum_name_to_id[einsum]
25
+ key = (buf, tensor_id, einsum_id)
26
+ if key in occupancy:
27
+ max_cap = max(
28
+ max_cap,
29
+ get_value_from_singular_qpolynomial(occupancy[key][1]),
30
+ )
31
+ caps[buf] += max_cap
32
+
33
+ elif node["type"] in ["sequential", "parallel", "pipeline"]:
34
+ aggregate_capacity = CAPACITY_AGGREGATORS[node["type"]]
35
+ child_caps = [
36
+ compute_capacity_usage(b, occupancy, workload) for b in node["branches"]
37
+ ]
38
+ aggregate_capacity(child_caps, caps)
39
+ return caps
40
+
41
+
42
+ def get_einsums(mapping):
43
+ for node in mapping:
44
+ if node["type"] in ["sequential", "parallel", "pipeline"]:
45
+ return sum((get_einsums(b) for b in node["branches"]), start=[])
46
+ elif node["type"] == "compute":
47
+ return [node["einsum"]]
@@ -0,0 +1,150 @@
1
+ from collections import defaultdict
2
+ from collections.abc import Mapping as MappingABC
3
+ from dataclasses import dataclass
4
+ import logging
5
+ from numbers import Number
6
+ from numbers import Real
7
+
8
+ from accelforge.frontend import arch
9
+ from accelforge.frontend.mapping.mapping import MappingNode
10
+ from accelforge.frontend.spec import Spec
11
+ from accelforge.model._looptree.reuse.symbolic import SymbolicAnalysisOutput
12
+ from accelforge.util._base_analysis_types import (
13
+ ActionCount,
14
+ ActionKey,
15
+ VerboseActionKey,
16
+ )
17
+ from accelforge.frontend.workload import Workload
18
+ from accelforge.frontend.mapping import Mapping
19
+
20
+
21
+ def gather_actions(
22
+ looptree_results: SymbolicAnalysisOutput,
23
+ bindings: dict[str, str],
24
+ verbose: bool = False,
25
+ use_name=False,
26
+ ):
27
+ actions: dict[tuple[str, str], ActionCount] = {}
28
+ compute_levels = set(c.level for c in looptree_results.compute_stats)
29
+
30
+ buffet_keyer = _get_buffet_keyer(verbose, use_name, bindings)
31
+ compute_keyer = _get_compute_keyer(verbose, use_name, bindings)
32
+
33
+ for buffet, accesses in looptree_results.buffet_stats.items():
34
+ if buffet.level in compute_levels:
35
+ continue
36
+
37
+ level = buffet.level
38
+
39
+ if use_name:
40
+ level = level
41
+ else:
42
+ level = bindings[level]
43
+
44
+ key = buffet_keyer(buffet, "read")
45
+ if key not in actions:
46
+ actions[key] = ActionCount.default()
47
+ actions[key].total += accesses.net_total_read_actions()
48
+ actions[key].max_per_unit += accesses.net_max_per_unit_read_actions()
49
+
50
+ key = buffet_keyer(buffet, "write")
51
+ if key not in actions:
52
+ actions[key] = ActionCount.default()
53
+ actions[key].total += accesses.net_total_write_actions()
54
+ actions[key].max_per_unit += accesses.net_max_per_unit_write_actions()
55
+
56
+ for compute, ops in looptree_results.compute_stats.items():
57
+ key = compute_keyer(compute, "compute")
58
+ if key not in actions:
59
+ actions[key] = ActionCount.default()
60
+ actions[key].total += ops.total_ops
61
+ actions[key].max_per_unit += ops.max_per_unit_ops
62
+
63
+ return actions
64
+
65
+
66
+ def _get_buffet_keyer(verbose, use_name, bindings):
67
+ if not verbose:
68
+
69
+ def get_buffet_key(buffet, action_name) -> ActionKey:
70
+ level = buffet.level
71
+ if use_name:
72
+ level = level
73
+ else:
74
+ level = bindings[level]
75
+ return ActionKey(level, action_name)
76
+
77
+ else:
78
+
79
+ def get_buffet_key(buffet, action_name) -> VerboseActionKey:
80
+ level = buffet.level
81
+ if use_name:
82
+ level = level
83
+ else:
84
+ level = bindings[level]
85
+ return VerboseActionKey(level, action_name, buffet.tensor, buffet.einsum)
86
+
87
+ return get_buffet_key
88
+
89
+
90
+ def _get_compute_keyer(verbose, use_name, bindings):
91
+ if not verbose:
92
+
93
+ def compute_keyer(compute, action_name):
94
+ level = compute.level
95
+ if use_name:
96
+ level = level
97
+ else:
98
+ level = bindings[level]
99
+ return ActionKey(level, action_name)
100
+
101
+ else:
102
+
103
+ def compute_keyer(compute, action_name):
104
+ level = compute.level
105
+ if use_name:
106
+ level = level
107
+ else:
108
+ level = bindings[level]
109
+ return VerboseActionKey(level, action_name, None, compute.einsum)
110
+
111
+ return compute_keyer
112
+
113
+
114
+ def compute_energy_from_actions(
115
+ spec: Spec,
116
+ action_counts: MappingABC[ActionKey, Real],
117
+ overall_latency: float,
118
+ component_to_non_power_gated_porp: dict[str, int] = None,
119
+ ) -> dict[ActionKey | VerboseActionKey, Number]:
120
+ if component_to_non_power_gated_porp is None:
121
+ logging.warning(
122
+ "No component_to_non_power_gated_porp provided, will not account for power gating."
123
+ )
124
+ component_to_non_power_gated_porp = {}
125
+
126
+ energy_result = {}
127
+ components = {}
128
+ for key, counts in action_counts.items():
129
+ if counts.total == 0:
130
+ continue
131
+ if key.level not in components:
132
+ components[key.level] = spec.arch.find(key.level)
133
+ component_obj = components[key.level]
134
+ try:
135
+ energy_per_ac = component_obj.actions[key.action].energy
136
+ except KeyError as e:
137
+ raise KeyError(
138
+ f"Action {key.action} not found in component {key.component}. Action occurred "
139
+ f"{counts.total} times."
140
+ ) from None
141
+ energy_result[key] = counts.total * energy_per_ac
142
+
143
+ for component_obj in spec.arch.get_nodes_of_type(arch.Component):
144
+ energy_result[ActionKey(component_obj.name, "leak")] = (
145
+ component_obj.total_leak_power
146
+ * overall_latency
147
+ * component_to_non_power_gated_porp.get(component_obj.name, 1)
148
+ )
149
+
150
+ return energy_result
@@ -0,0 +1,29 @@
1
+ # from bindings.looptree import LooptreeWorkload, LooptreeWorkloadDependencyAnalyzer
2
+
3
+
4
+ class EquivalentGroups:
5
+ def __init__(self):
6
+ self.group_id_to_ranks = {}
7
+ self.rank_to_group_id = {}
8
+
9
+ @staticmethod
10
+ def from_workload(
11
+ workload: "LooptreeWorkload", analyzer: "LooptreeWorkloadDependencyAnalyzer"
12
+ ):
13
+ einsum_id_to_name = workload.einsum_id_to_name()
14
+
15
+ groups = EquivalentGroups()
16
+
17
+ seen_ranks = set()
18
+ for einsum_id in einsum_id_to_name:
19
+ for rank_id in workload.einsum_ospace_dimensions(einsum_id):
20
+ equiv_ranks = analyzer.equivalent_dimensions(einsum_id, rank_id)
21
+ equiv_ranks = frozenset(equiv_ranks)
22
+ if equiv_ranks not in seen_ranks:
23
+ seen_ranks.add(equiv_ranks)
24
+ group_id = len(groups.group_id_to_ranks)
25
+ groups.group_id_to_ranks[group_id] = equiv_ranks
26
+ for r in equiv_ranks:
27
+ groups.rank_to_group_id[r] = group_id
28
+
29
+ return groups
@@ -0,0 +1 @@
1
+ from .latency import get_latency
@@ -0,0 +1,98 @@
1
+ from typing import overload
2
+ from sympy import Piecewise
3
+
4
+ # from accelforge.model._looptree._isl.singular import get_value_from_singular_qpolynomial
5
+ from accelforge.frontend.arch import Compute
6
+ from accelforge.model._looptree.latency.processors import LATENCY_PROCESSORS
7
+ from accelforge.model._looptree.reuse.isl import IslReuseAnalysisOutput
8
+ from accelforge.model._looptree.reuse import SymbolicAnalysisOutput
9
+
10
+ from accelforge.util._sympy.broadcast_max import Max
11
+
12
+ # from bindings.looptree import SpatialTag
13
+
14
+
15
+ def get_latency(looptree_results, mapping, workload, flattened_arch):
16
+ comp_latency = calculate_compute_latency(looptree_results, mapping, workload)
17
+ mem_latency = memory_latency(looptree_results, flattened_arch, mapping, workload)
18
+
19
+ overall_latency = Max(comp_latency, *mem_latency.values())
20
+ return overall_latency, comp_latency, mem_latency
21
+
22
+
23
+ @overload
24
+ def calculate_compute_latency(
25
+ reuse_analysis_results: IslReuseAnalysisOutput, mapping, workload
26
+ ):
27
+ pass
28
+
29
+
30
+ @overload
31
+ def calculate_compute_latency(
32
+ reuse_analysis_results: SymbolicAnalysisOutput, mapping, workload
33
+ ):
34
+ pass
35
+
36
+
37
+ def calculate_compute_latency(reuse_analysis_results, mapping, workload):
38
+ if isinstance(reuse_analysis_results, IslReuseAnalysisOutput):
39
+ return compute_isl_latency(
40
+ reuse_analysis_results.temporal_steps, mapping, workload
41
+ )
42
+ elif isinstance(reuse_analysis_results, SymbolicAnalysisOutput):
43
+ return compute_summarized_latency(
44
+ reuse_analysis_results.compute_stats, mapping, workload
45
+ )
46
+
47
+
48
+ def compute_summarized_latency(compute_stats, mapping, workload):
49
+ # TODO: this is only for single-Einsum!!!
50
+ longest_compute_latency = 0
51
+ for stats in compute_stats.values():
52
+ if longest_compute_latency == 0:
53
+ longest_compute_latency = stats.max_latency
54
+ else:
55
+ longest_compute_latency = Max(longest_compute_latency, stats.max_latency)
56
+ return longest_compute_latency
57
+
58
+
59
+ def compute_isl_latency(temporal_steps, mapping, workload):
60
+ raise NotImplementedError()
61
+ return get_value_from_singular_qpolynomial(
62
+ _compute_latency(mapping.nodes, 0, temporal_steps, workload)[1]
63
+ ).to_python()
64
+
65
+
66
+ def _compute_latency(mapping, top_idx: int, temporal_steps, workload):
67
+ raise NotImplementedError()
68
+ einsum_name_to_id = workload.einsum_name_to_id()
69
+
70
+ next_top_idx = top_idx
71
+ for node in mapping:
72
+ next_top_idx += 1
73
+
74
+ if node["type"] in LATENCY_PROCESSORS.keys():
75
+ children_latencies = [
76
+ _compute_latency(branch, next_top_idx, temporal_steps, workload)
77
+ for branch in node["branches"]
78
+ ]
79
+
80
+ return LATENCY_PROCESSORS[node["type"]](top_idx, children_latencies)
81
+ elif node["type"] == "compute":
82
+ einsum = node["einsum"]
83
+ if "incomplete" in node and node["incomplete"]:
84
+ return ([], 0)
85
+ einsum_id = einsum_name_to_id[einsum]
86
+ return temporal_steps[einsum_id]
87
+
88
+
89
+ def ops_to_latency(dims, map):
90
+ raise NotImplementedError()
91
+ mask = [False] * len(dims)
92
+ new_dims = []
93
+ for i, d in enumerate(dims):
94
+ if d == SpatialTag:
95
+ mask[i] = True
96
+ else:
97
+ new_dims.append(d)
98
+ return map.domain().identity().card()
@@ -0,0 +1,120 @@
1
+ from collections import defaultdict
2
+
3
+ from accelforge.frontend import arch
4
+ from accelforge.frontend.arch import Leaf, Memory, TensorHolder, Component
5
+ from accelforge.frontend.mapping import Compute, Mapping
6
+ from accelforge.frontend.spec import Spec
7
+
8
+ from accelforge.model._looptree.accesses import isl_buffer_accesses_from_buffet_actions
9
+ from accelforge.model._looptree.mapping_utilities import get_leaves
10
+ from accelforge.model._looptree.reuse.isl import IslReuseAnalysisOutput
11
+ from accelforge.model._looptree.reuse import SymbolicAnalysisOutput
12
+ from accelforge.model._looptree.types import Buffet
13
+
14
+ from accelforge.model._looptree.reuse.symbolic import BuffetStats
15
+ from accelforge.util._parse_expressions import MATH_FUNCS, parse_expression
16
+ from accelforge.util._sympy.broadcast_max import Max, Min
17
+ import sympy as sp
18
+
19
+
20
+ def isl_to_summarized(
21
+ looptree_results: IslReuseAnalysisOutput, mapping, workload
22
+ ) -> SymbolicAnalysisOutput:
23
+ accesses_stats = isl_buffer_accesses_from_buffet_actions(
24
+ looptree_results, mapping, workload, is_path=False
25
+ )
26
+ buffet_stats = {
27
+ Buffet(level=component, tensor=tensor, einsum=einsum): BuffetStats(
28
+ max_per_unit_read_actions=accesses.max_per_unit_reads,
29
+ max_per_unit_write_actions=accesses.max_per_unit_writes,
30
+ )
31
+ for (component, tensor, einsum), accesses in accesses_stats.items()
32
+ }
33
+ return SymbolicAnalysisOutput(buffet_stats=buffet_stats)
34
+
35
+
36
+ def component_latency(
37
+ looptree_results: SymbolicAnalysisOutput,
38
+ flattened_arch: list[Leaf],
39
+ mapping: Mapping,
40
+ spec: Spec,
41
+ ):
42
+ component_to_actions: dict[str, dict[str, float]] = defaultdict(
43
+ lambda: defaultdict(lambda: 0)
44
+ )
45
+ name2component: dict[str, Component] = {node.name: node for node in flattened_arch}
46
+
47
+ compute_obj = flattened_arch[-1]
48
+ if not isinstance(compute_obj, arch.Compute):
49
+ raise ValueError("Last node in flattened_arch must be a Compute")
50
+
51
+ for buffet, buffet_stats in looptree_results.buffet_stats.items():
52
+ component = buffet.level
53
+ actions = component_to_actions[component]
54
+ if component not in name2component:
55
+ raise ValueError(f"Component {component} found in mapping but not arch")
56
+
57
+ for action in name2component[component].actions:
58
+ actions[f"{action.name}_actions"] += 0
59
+
60
+ if isinstance(name2component[component], TensorHolder):
61
+ actions["read_actions"] += (
62
+ buffet_stats.max_per_unit_read_actions
63
+ - buffet_stats.min_per_unit_skipped_first_read_actions
64
+ )
65
+ if not isinstance(name2component[component], arch.ProcessingStage):
66
+ actions["write_actions"] += (
67
+ buffet_stats.max_per_unit_write_actions
68
+ - buffet_stats.min_per_unit_skipped_first_write_actions
69
+ )
70
+ elif isinstance(name2component[component], arch.Compute):
71
+ pass
72
+ else:
73
+ raise NotImplementedError(
74
+ f"Component {component} is not a TensorHolder or Compute"
75
+ )
76
+
77
+ longest_compute_latency = Max(
78
+ 0, *[s.max_latency for s in looptree_results.compute_stats.values()]
79
+ )
80
+ component_to_actions[compute_obj.name]["compute_actions"] = longest_compute_latency
81
+
82
+ # TODO: Unhardcode "compute" name"
83
+ component_to_action_latency = defaultdict(dict)
84
+ for component, actions in component_to_actions.items():
85
+ component_obj = name2component[component]
86
+ for action, count in actions.items():
87
+ action_name = action.rsplit("_", 1)[0]
88
+ latency = component_obj.actions[action_name].latency
89
+ component_to_action_latency[component][f"{action_name}_latency"] = (
90
+ latency * count
91
+ )
92
+
93
+ component_latency = {}
94
+
95
+ symbol_table_base = {
96
+ **dict(spec.variables),
97
+ "variables": spec.variables,
98
+ "max": Max,
99
+ "min": Min,
100
+ "sum": sp.Add,
101
+ }
102
+
103
+ for component, actions in component_to_actions.items():
104
+ component_obj = name2component[component]
105
+ symbol_table = {
106
+ "action2latency": component_to_action_latency[component],
107
+ **symbol_table_base,
108
+ **dict(name2component[component]),
109
+ **actions,
110
+ **component_to_action_latency[component],
111
+ }
112
+ if name2component[component].total_latency is not None:
113
+ component_latency[component] = parse_expression(
114
+ name2component[component].total_latency,
115
+ symbol_table,
116
+ attr_name="latency",
117
+ location=component,
118
+ )
119
+
120
+ return component_latency
@@ -0,0 +1,92 @@
1
+ import pprint
2
+
3
+ import islpy as isl
4
+
5
+ # from bindings.looptree import PipelineSpatialTag
6
+ # from pytimeloop._isl.sum import sum_until_idx, make_reduction_map
7
+ # from pytimeloop._isl.qpolynomial import from_pw_qpolynomial_fold
8
+
9
+
10
+ def process_sequential_latency(top_idx: int, latencies):
11
+ common_dim_tags = latencies[0][0][:top_idx]
12
+ try:
13
+ total_sequential_latency = sum(
14
+ sum_until_idx(top_idx, latency) for dim_tags, latency in latencies
15
+ )
16
+ except:
17
+ print("Bad input:")
18
+ pprint.pp(latencies)
19
+ raise
20
+ return common_dim_tags, total_sequential_latency
21
+
22
+
23
+ def process_pipeline_latency(top_idx: int, latencies):
24
+ sequential_latency = process_sequential_latency(top_idx, latencies)[1]
25
+
26
+ all_dim_tags = latencies[0][0]
27
+ dim_tags = all_dim_tags[:]
28
+ for pipeline_idx in range(len(dim_tags)):
29
+ if isinstance(dim_tags[pipeline_idx], PipelineSpatialTag):
30
+ break
31
+
32
+ try:
33
+ dim_tags = dim_tags[: pipeline_idx + 1]
34
+ summed_latency = sum(
35
+ sum_until_idx(pipeline_idx + 1, latency) for tags, latency in latencies
36
+ )
37
+ except:
38
+ print("Bad input:")
39
+ pprint.pp(latencies)
40
+ raise
41
+
42
+ space = summed_latency.get_domain_space()
43
+ hidden_latency_map = make_hidden_latency_map(dim_tags, space, len(latencies))
44
+ hidden_latencies = hidden_latency_map.apply_pw_qpolynomial(summed_latency)
45
+
46
+ reduction_map = make_reduction_map(space, len(dim_tags) - 1, 1)
47
+ reduction_map = reduction_map.intersect_range(summed_latency.domain()).coalesce()
48
+ hidden_latencies, is_tight = reduction_map.apply_pw_qpolynomial_fold(
49
+ isl.PwQPolynomialFold.from_pw_qpolynomial(isl.fold.min, hidden_latencies)
50
+ )
51
+ hidden_latencies = from_pw_qpolynomial_fold(hidden_latencies)
52
+
53
+ # Remove last one
54
+ domain = hidden_latencies.domain()
55
+ hidden_latencies = hidden_latencies.subtract_domain(domain.lexmax())
56
+
57
+ hidden_latency = sum_until_idx(top_idx, hidden_latencies)
58
+
59
+ return all_dim_tags[:top_idx], sequential_latency - hidden_latency
60
+
61
+
62
+ LATENCY_PROCESSORS = {
63
+ "sequential": process_sequential_latency,
64
+ "pipeline": process_pipeline_latency,
65
+ }
66
+
67
+
68
+ def make_hidden_latency_map(dim_tags, space, n_stages):
69
+ """
70
+ space: [..., t, ps]
71
+ returns: [..., t, ps] -> [..., t', ps'] : P*t+ps+1 <= P*t'+ps' < P*t+ps+P
72
+ """
73
+ assert len(dim_tags) >= 2
74
+
75
+ t_idx = len(dim_tags) - 2
76
+ ps_idx = len(dim_tags) - 1
77
+
78
+ tprime = isl.Aff.var_on_domain(space, isl.dim_type.set, t_idx)
79
+ ps_prime = isl.Aff.var_on_domain(space, isl.dim_type.set, ps_idx)
80
+ inner = n_stages * tprime + ps_prime
81
+
82
+ lower = n_stages * tprime + ps_prime + 1
83
+ upper = n_stages * tprime + ps_prime + n_stages
84
+
85
+ hidden_latency_map = lower.le_map(inner).intersect(upper.gt_map(inner))
86
+
87
+ # Make other dimensions equal
88
+ for i in range(t_idx):
89
+ var = isl.Aff.var_on_domain(space, isl.dim_type.set, i)
90
+ hidden_latency_map = hidden_latency_map.intersect(var.eq_map(var))
91
+
92
+ return hidden_latency_map
@@ -0,0 +1,71 @@
1
+ from typing import Generator, List, Tuple
2
+
3
+ from accelforge.frontend.mapping import (
4
+ Compute,
5
+ Mapping,
6
+ MappingNode,
7
+ Pipeline,
8
+ Sequential,
9
+ )
10
+ from accelforge.frontend.workload import Workload
11
+
12
+
13
+ def get_paths(root: Mapping) -> Generator[Tuple[MappingNode, Compute], None, None]:
14
+ """
15
+ Given a MappingNode, get the paths to all all leaves in post-order.
16
+
17
+ :param root: The root of the child exploration.
18
+
19
+ :type root: MappingNode
20
+
21
+ :returns: A generator of all the MappingNodes to a Compute leaf.
22
+ :rtype: Generator[List[MappingNode]]
23
+ """
24
+ cur_path: List[MappingNode] = []
25
+ for node in root.nodes:
26
+ cur_path.append(node)
27
+ match node:
28
+ # Pipelines or sequentials should have their paths expanded.
29
+ # Mappings naturally get expanded.
30
+ case Mapping() | Pipeline() | Sequential():
31
+ for child in node.nodes:
32
+ for subpath in get_paths(child):
33
+ yield tuple(cur_path) + subpath
34
+ # Computes are leaves so should get a yield here.
35
+ case Compute():
36
+ yield tuple(cur_path)
37
+ # Not implemented so continue.
38
+ case _:
39
+ # TODO: Check this is correct
40
+ continue
41
+ raise NotImplementedError(
42
+ f"{type(node)} does not have type elucidation.\n"
43
+ f"---\n"
44
+ f"node={node}"
45
+ )
46
+
47
+
48
+ def get_leaves(mapping: Mapping, is_path):
49
+ if is_path:
50
+ yield mapping[-1]
51
+ return
52
+ for node in mapping:
53
+ if isinstance(node, Pipeline) or isinstance(node, Sequential):
54
+ for child in node.children:
55
+ yield from get_leaves(child, is_path)
56
+ elif isinstance(node, Compute):
57
+ yield node
58
+
59
+
60
+ def get_intermediate_tensors(workload: Workload):
61
+ result = set()
62
+ for einsum in workload.einsum_id_to_name():
63
+ written_tensors = workload.einsums[einsum].output_tensor_names
64
+ for tensor in written_tensors:
65
+ reader_einsums = workload.reader_einsums(tensor)
66
+ for reader in reader_einsums:
67
+ if reader in workload.einsum_id_to_name():
68
+ result.add(tensor)
69
+ break
70
+
71
+ return result
@@ -0,0 +1,4 @@
1
+ from .symbolic import (
2
+ SymbolicAnalysisOutput,
3
+ analyze_reuse_and_add_reservations_to_mapping,
4
+ )
@@ -0,0 +1 @@
1
+ from .des import IslReuseAnalysisOutput
@@ -0,0 +1,59 @@
1
+ """
2
+ TODO: Is this file still necessary? It is referenced elsewhere but is no longer
3
+ the format we are looking for.
4
+ """
5
+
6
+ from dataclasses import dataclass, field
7
+
8
+ import islpy as isl
9
+
10
+
11
+ @dataclass
12
+ class IslReuseAnalysisOutput:
13
+ ops: dict = field(default_factory=dict)
14
+ fills: dict = field(default_factory=dict)
15
+ occupancy: dict = field(default_factory=dict)
16
+ op_occupancy: dict = field(default_factory=dict)
17
+ reads_to_peer: dict = field(default_factory=dict)
18
+ reads_to_parent: dict = field(default_factory=dict)
19
+ temporal_steps: dict = field(default_factory=dict)
20
+ fanout: dict = field(default_factory=dict)
21
+ op_intensity: dict = field(default_factory=dict)
22
+
23
+
24
+ def deserialize_looptree_output(
25
+ looptree_output, isl_ctx: isl.Context #: bindings.looptree.LooptreeResult,
26
+ ) -> IslReuseAnalysisOutput:
27
+ output = IslReuseAnalysisOutput()
28
+
29
+ output.ops = {
30
+ k: (dims, isl.PwQPolynomial.read_from_str(isl_ctx, v))
31
+ for k, (dims, v) in looptree_output.ops.items()
32
+ }
33
+
34
+ output.fills = {
35
+ k: (dims, isl.PwQPolynomial.read_from_str(isl_ctx, v))
36
+ for k, (dims, v) in looptree_output.fills.items()
37
+ }
38
+
39
+ output.occupancy = {
40
+ k: (dims, isl.PwQPolynomial.read_from_str(isl_ctx, v))
41
+ for k, (dims, v) in looptree_output.occupancy.items()
42
+ }
43
+
44
+ output.reads_to_peer = {
45
+ k: (dims, isl.PwQPolynomial.read_from_str(isl_ctx, v))
46
+ for k, (dims, v) in looptree_output.reads_to_peer.items()
47
+ }
48
+
49
+ output.reads_to_parent = {
50
+ k: (dims, isl.PwQPolynomial.read_from_str(isl_ctx, v))
51
+ for k, (dims, v) in looptree_output.reads_to_parent.items()
52
+ }
53
+
54
+ output.temporal_steps = {
55
+ k: (dims, isl.PwQPolynomial.read_from_str(isl_ctx, v))
56
+ for k, (dims, v) in looptree_output.temporal_steps.items()
57
+ }
58
+
59
+ return output