accelforge 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (258) hide show
  1. accelforge/__init__.py +21 -0
  2. accelforge/_accelerated_imports.py +16 -0
  3. accelforge/_deprecate/_simanneal/evalmapping.py +271 -0
  4. accelforge/_deprecate/_simanneal/mapspaceglobals.py +298 -0
  5. accelforge/_deprecate/_simanneal/simanneal.py +666 -0
  6. accelforge/_deprecate/_simanneal/tracking.py +105 -0
  7. accelforge/_deprecate/_simanneal/wrappers.py +218 -0
  8. accelforge/_deprecate/_simanneal2/__init__.py +7 -0
  9. accelforge/_deprecate/_simanneal2/simanneal.py +493 -0
  10. accelforge/_deprecate/_simanneal2/tracking.py +116 -0
  11. accelforge/_deprecate/compatibility_util.py +181 -0
  12. accelforge/_deprecate/layerdeduplication/__init__.py +2 -0
  13. accelforge/_deprecate/layerdeduplication/group_similar_einsums.py +160 -0
  14. accelforge/_deprecate/layerdeduplication/grouped_einsums.py +84 -0
  15. accelforge/_deprecate/mapping_filter_tags/__init__.py +2 -0
  16. accelforge/_deprecate/mapping_filter_tags/ffmt.py +212 -0
  17. accelforge/_deprecate/mapping_filter_tags/onesplit.py +24 -0
  18. accelforge/_deprecate/mapping_filter_tags/util.py +24 -0
  19. accelforge/_deprecate/tags.py +69 -0
  20. accelforge/_deprecate/viz/__init__.py +0 -0
  21. accelforge/_deprecate/viz/interactive.py +159 -0
  22. accelforge/_deprecate/viz/reservationtree.py +307 -0
  23. accelforge/_deprecate/viz/ski_slope.py +88 -0
  24. accelforge/_version.py +15 -0
  25. accelforge/examples.py +39 -0
  26. accelforge/frontend/__init__.py +10 -0
  27. accelforge/frontend/_binding.py +129 -0
  28. accelforge/frontend/_workload_isl/__init__.py +2 -0
  29. accelforge/frontend/_workload_isl/_isl.py +149 -0
  30. accelforge/frontend/_workload_isl/_symbolic.py +141 -0
  31. accelforge/frontend/arch copy.py +1544 -0
  32. accelforge/frontend/arch.py +1642 -0
  33. accelforge/frontend/config.py +63 -0
  34. accelforge/frontend/mapper/__init__.py +5 -0
  35. accelforge/frontend/mapper/ffm.py +126 -0
  36. accelforge/frontend/mapper/mapper.py +7 -0
  37. accelforge/frontend/mapper/metrics.py +30 -0
  38. accelforge/frontend/mapping/__init__.py +1 -0
  39. accelforge/frontend/mapping/mapping.py +1736 -0
  40. accelforge/frontend/model.py +14 -0
  41. accelforge/frontend/renames.py +150 -0
  42. accelforge/frontend/spec copy.py +230 -0
  43. accelforge/frontend/spec.py +301 -0
  44. accelforge/frontend/variables.py +12 -0
  45. accelforge/frontend/workload.py +952 -0
  46. accelforge/mapper/FFM/__init__.py +9 -0
  47. accelforge/mapper/FFM/_join_pmappings/__init__.py +0 -0
  48. accelforge/mapper/FFM/_join_pmappings/compatibility.py +653 -0
  49. accelforge/mapper/FFM/_join_pmappings/compress_pmappings.py +140 -0
  50. accelforge/mapper/FFM/_join_pmappings/join_pmappings.py +703 -0
  51. accelforge/mapper/FFM/_join_pmappings/pmapping_dataframe.py +901 -0
  52. accelforge/mapper/FFM/_join_pmappings/pmapping_group.py +337 -0
  53. accelforge/mapper/FFM/_make_pmappings/contraints/__init__.py +0 -0
  54. accelforge/mapper/FFM/_make_pmappings/contraints/constraints.py +360 -0
  55. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/__init__.py +1 -0
  56. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_loops.py +373 -0
  57. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_pmapping_templates.py +463 -0
  58. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_reservations.py +95 -0
  59. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storage_order.py +382 -0
  60. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storages.py +155 -0
  61. accelforge/mapper/FFM/_make_pmappings/make_pmappings.py +411 -0
  62. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/__init__.py +1 -0
  63. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_pmappings_from_templates.py +407 -0
  64. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_tile_shapes.py +1681 -0
  65. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/run_model.py +170 -0
  66. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/symbol_relations.py +174 -0
  67. accelforge/mapper/FFM/_make_pmappings/pmapper_job.py +282 -0
  68. accelforge/mapper/FFM/_pareto_df/df_convention.py +273 -0
  69. accelforge/mapper/FFM/_pareto_df/pareto copy.py +836 -0
  70. accelforge/mapper/FFM/_pareto_df/pareto.py +508 -0
  71. accelforge/mapper/FFM/data.py +61 -0
  72. accelforge/mapper/FFM/main copy.py +236 -0
  73. accelforge/mapper/FFM/main.py +208 -0
  74. accelforge/mapper/FFM/mappings.py +510 -0
  75. accelforge/mapper/FFM/pmappings.py +310 -0
  76. accelforge/mapper/__init__.py +4 -0
  77. accelforge/mapper.py +0 -0
  78. accelforge/model/__init__.py +1 -0
  79. accelforge/model/_looptree/__init__.py +0 -0
  80. accelforge/model/_looptree/accesses.py +335 -0
  81. accelforge/model/_looptree/capacity/__init__.py +1 -0
  82. accelforge/model/_looptree/capacity/aggregators.py +36 -0
  83. accelforge/model/_looptree/capacity/capacity.py +47 -0
  84. accelforge/model/_looptree/energy.py +150 -0
  85. accelforge/model/_looptree/equivalent_ranks.py +29 -0
  86. accelforge/model/_looptree/latency/__init__.py +1 -0
  87. accelforge/model/_looptree/latency/latency.py +98 -0
  88. accelforge/model/_looptree/latency/memory.py +120 -0
  89. accelforge/model/_looptree/latency/processors.py +92 -0
  90. accelforge/model/_looptree/mapping_utilities.py +71 -0
  91. accelforge/model/_looptree/reuse/__init__.py +4 -0
  92. accelforge/model/_looptree/reuse/isl/__init__.py +1 -0
  93. accelforge/model/_looptree/reuse/isl/des.py +59 -0
  94. accelforge/model/_looptree/reuse/isl/isl_functions.py +374 -0
  95. accelforge/model/_looptree/reuse/isl/mapping_to_isl/__init__.py +4 -0
  96. accelforge/model/_looptree/reuse/isl/mapping_to_isl/analyze_mapping.py +297 -0
  97. accelforge/model/_looptree/reuse/isl/mapping_to_isl/skews_from_mapping.py +236 -0
  98. accelforge/model/_looptree/reuse/isl/mapping_to_isl/tiling.py +685 -0
  99. accelforge/model/_looptree/reuse/isl/mapping_to_isl/types.py +188 -0
  100. accelforge/model/_looptree/reuse/isl/spatial.py +260 -0
  101. accelforge/model/_looptree/reuse/isl/temporal.py +182 -0
  102. accelforge/model/_looptree/reuse/symbolic/__init__.py +1 -0
  103. accelforge/model/_looptree/reuse/symbolic/symbolic copy 2.py +1346 -0
  104. accelforge/model/_looptree/reuse/symbolic/symbolic copy.py +1408 -0
  105. accelforge/model/_looptree/reuse/symbolic/symbolic.py +1396 -0
  106. accelforge/model/_looptree/run.py +122 -0
  107. accelforge/model/_looptree/types.py +26 -0
  108. accelforge/model/_looptree/visualization/__init__.py +0 -0
  109. accelforge/model/_looptree/visualization/occupancy.py +11 -0
  110. accelforge/model/main.py +222 -0
  111. accelforge/plotting/__init__.py +2 -0
  112. accelforge/plotting/mappings.py +219 -0
  113. accelforge/plotting/specs.py +57 -0
  114. accelforge/util/__init__.py +4 -0
  115. accelforge/util/_base_analysis_types.py +24 -0
  116. accelforge/util/_basetypes.py +1089 -0
  117. accelforge/util/_frozenset.py +36 -0
  118. accelforge/util/_isl.py +29 -0
  119. accelforge/util/_itertools.py +14 -0
  120. accelforge/util/_mathfuncs.py +57 -0
  121. accelforge/util/_parse_expressions.py +339 -0
  122. accelforge/util/_picklecache.py +32 -0
  123. accelforge/util/_setexpressions.py +268 -0
  124. accelforge/util/_sympy/__init__.py +0 -0
  125. accelforge/util/_sympy/broadcast_max.py +18 -0
  126. accelforge/util/_visualization.py +112 -0
  127. accelforge/util/_yaml.py +579 -0
  128. accelforge/util/parallel.py +193 -0
  129. accelforge-0.0.1.dist-info/METADATA +64 -0
  130. accelforge-0.0.1.dist-info/RECORD +258 -0
  131. accelforge-0.0.1.dist-info/WHEEL +5 -0
  132. accelforge-0.0.1.dist-info/licenses/LICENSE +19 -0
  133. accelforge-0.0.1.dist-info/top_level.txt +5 -0
  134. docs/_build/html/_sources/fastfusion.frontend.mapper.rst.txt +37 -0
  135. docs/_build/html/_sources/fastfusion.frontend.rst.txt +70 -0
  136. docs/_build/html/_sources/fastfusion.frontend.workload.rst.txt +21 -0
  137. docs/_build/html/_sources/fastfusion.mapper.FFM.rst.txt +37 -0
  138. docs/_build/html/_sources/fastfusion.mapper.rst.txt +18 -0
  139. docs/_build/html/_sources/fastfusion.rst.txt +20 -0
  140. docs/_build/html/_sources/fastfusion.util.rst.txt +21 -0
  141. docs/_build/html/_sources/index.rst.txt +87 -0
  142. docs/_build/html/_sources/modules.rst.txt +7 -0
  143. docs/_build/html/_sources/notes/citation.rst.txt +45 -0
  144. docs/_build/html/_sources/notes/definitions.rst.txt +43 -0
  145. docs/_build/html/_sources/notes/faqs.rst.txt +39 -0
  146. docs/_build/html/_sources/notes/modeling/accelerator_energy_latency.rst.txt +72 -0
  147. docs/_build/html/_sources/notes/modeling/component_energy_area.rst.txt +96 -0
  148. docs/_build/html/_sources/notes/modeling/mapping.rst.txt +100 -0
  149. docs/_build/html/_sources/notes/modeling.rst.txt +33 -0
  150. docs/_build/html/_sources/notes/parsing/arithmetic_parsing.rst.txt +136 -0
  151. docs/_build/html/_sources/notes/parsing/setexpressions.rst.txt +63 -0
  152. docs/_build/html/_sources/notes/parsing/yaml_parsing.rst.txt +176 -0
  153. docs/_build/html/_sources/notes/quickstart_and_installation.rst.txt +9 -0
  154. docs/_build/html/_sources/notes/spec/architecture.rst.txt +133 -0
  155. docs/_build/html/_sources/notes/spec/mapping.rst.txt +12 -0
  156. docs/_build/html/_sources/notes/spec/workload.rst.txt +83 -0
  157. docs/_build/html/_sources/notes/spec.rst.txt +36 -0
  158. docs/source/_ext/include_attrs.py +213 -0
  159. docs/source/_ext/include_docstring.py +364 -0
  160. docs/source/_ext/include_functions.py +154 -0
  161. docs/source/_ext/include_notebook.py +131 -0
  162. docs/source/_ext/include_yaml.py +119 -0
  163. docs/source/_ext/inherited_attributes.py +222 -0
  164. docs/source/_ext/paths.py +4 -0
  165. docs/source/conf.py +79 -0
  166. examples/arches/compute_in_memory/_include.yaml +74 -0
  167. examples/arches/compute_in_memory/_include_functions.py +229 -0
  168. examples/arches/compute_in_memory/_load_spec.py +57 -0
  169. examples/arches/compute_in_memory/components/c2c_multiplier.py +181 -0
  170. examples/arches/compute_in_memory/components/dac_c2c_r2r.py +605 -0
  171. examples/arches/compute_in_memory/components/misc.py +195 -0
  172. examples/arches/compute_in_memory/components/util/bit_functions.py +51 -0
  173. examples/arches/compute_in_memory/components/zero_comparator.py +92 -0
  174. examples/arches/compute_in_memory/isaac.yaml +233 -0
  175. examples/arches/compute_in_memory/memory_cells/ecram_demo.yaml +63 -0
  176. examples/arches/compute_in_memory/memory_cells/rram_example.yaml +63 -0
  177. examples/arches/compute_in_memory/memory_cells/rram_isaac_isca_2016.yaml +64 -0
  178. examples/arches/compute_in_memory/memory_cells/rram_neurosim_default.yaml +63 -0
  179. examples/arches/compute_in_memory/memory_cells/rram_raella_isca_2023.yaml +70 -0
  180. examples/arches/compute_in_memory/memory_cells/rram_wan_nature_2022.yaml +63 -0
  181. examples/arches/compute_in_memory/memory_cells/sram_colonnade_jssc_2021.yaml +63 -0
  182. examples/arches/compute_in_memory/memory_cells/sram_example.yaml +63 -0
  183. examples/arches/compute_in_memory/memory_cells/sram_jia_jssc_2020.yaml +63 -0
  184. examples/arches/compute_in_memory/memory_cells/sram_sinangil_jssc_2021.yaml +63 -0
  185. examples/arches/compute_in_memory/memory_cells/sram_wang_vlsi_2022.yaml +63 -0
  186. examples/arches/compute_in_memory/wang_vlsi_2022.yaml +289 -0
  187. examples/arches/eyeriss.yaml +68 -0
  188. examples/arches/fanout_variations/at_glb.yaml +31 -0
  189. examples/arches/fanout_variations/at_glb_with_fanout_node.yaml +34 -0
  190. examples/arches/fanout_variations/at_mac.yaml +31 -0
  191. examples/arches/fanout_variations/at_mac_with_constraints.yaml +38 -0
  192. examples/arches/fanout_variations/at_mac_with_fanout_node.yaml +34 -0
  193. examples/arches/nvdla.yaml +47 -0
  194. examples/arches/simple.yaml +28 -0
  195. examples/arches/tpu_v4i.yaml +67 -0
  196. examples/mappings/unfused_matmuls_to_simple.yaml +33 -0
  197. examples/misc/component_annotated.yaml +33 -0
  198. examples/workloads/gpt3_6.7B.yaml +124 -0
  199. examples/workloads/matmuls.yaml +20 -0
  200. examples/workloads/mobilenet_28.yaml +81 -0
  201. examples/workloads/mobilenet_various_separate.yaml +106 -0
  202. examples/workloads/three_matmuls_annotated.yaml +59 -0
  203. notebooks/.ipynb_checkpoints/fastfusion_arch_study_michael-checkpoint.ipynb +359 -0
  204. notebooks/compute_in_memory/_scripts.py +339 -0
  205. notebooks/compute_in_memory/isaac.guide.ipynb +270 -0
  206. notebooks/compute_in_memory/wang_vlsi_2022.ipynb +602 -0
  207. notebooks/paths.py +4 -0
  208. notebooks/tutorials/.ipynb_checkpoints/1_FFM-checkpoint.ipynb +3110 -0
  209. notebooks/tutorials/FFM.ipynb +3498 -0
  210. notebooks/tutorials/_include.py +48 -0
  211. notebooks/tutorials/component_energy_area.ipynb +363 -0
  212. tests/Q_mapping.yaml +38 -0
  213. tests/__init__.py +0 -0
  214. tests/conv.mapping.yaml +27 -0
  215. tests/conv.workload.yaml +13 -0
  216. tests/conv_sym.mapping.yaml +43 -0
  217. tests/copy.mapping.yaml +35 -0
  218. tests/copy.workload.yaml +15 -0
  219. tests/distribuffers/__init__.py +0 -0
  220. tests/distribuffers/multicast/test_cases.yaml +482 -0
  221. tests/distribuffers/spec/binding/valid_bindings.yaml +97 -0
  222. tests/distribuffers/spec/distributed.yaml +100 -0
  223. tests/distribuffers/spec/logical_arch.yaml +32 -0
  224. tests/distribuffers/spec/physical_arch.yaml +69 -0
  225. tests/distribuffers/test_binding.py +48 -0
  226. tests/frontend/__init__.py +0 -0
  227. tests/frontend/test_mapping_viz.py +52 -0
  228. tests/mapper/__init__.py +0 -0
  229. tests/mapper/configs/conv1d/conv1d.mapping.yaml +31 -0
  230. tests/mapper/configs/conv1d/conv1d.workload.yaml +11 -0
  231. tests/mapper/configs/two_conv1d/two_conv1d.expected.yaml +38 -0
  232. tests/mapper/configs/two_conv1d/two_conv1d.mapping.yaml +54 -0
  233. tests/mapper/configs/two_conv1d/two_conv1d.workload.yaml +19 -0
  234. tests/mapper/test_mapping_to_isl.py +90 -0
  235. tests/mapper/test_spatial_reuse_analysis.py +67 -0
  236. tests/mapper/test_temporal_reuse_analysis.py +56 -0
  237. tests/mapper/util.py +58 -0
  238. tests/matmul.mapping.yaml +29 -0
  239. tests/matmul.workload.yaml +12 -0
  240. tests/matmul_spatial.mapping.yaml +44 -0
  241. tests/mha.renames.yaml +65 -0
  242. tests/mha.workload.yaml +67 -0
  243. tests/mha.yaml +59 -0
  244. tests/mha_full.workload.yaml +67 -0
  245. tests/mobilenet.workload.yaml +35 -0
  246. tests/mobilenet_long.workload.yaml +64 -0
  247. tests/pmappingcache.py +24 -0
  248. tests/processing_stage.arch.yaml +40 -0
  249. tests/snowcat.arch.yaml +36 -0
  250. tests/test_ffm_join_pmappings.py +106 -0
  251. tests/test_ffm_make_pmappings.py +82 -0
  252. tests/test_ffm_make_tile_shapes.py +49 -0
  253. tests/test_mapper.py +100 -0
  254. tests/test_model.py +37 -0
  255. tests/test_plotting.py +72 -0
  256. tests/test_processing_stage.py +46 -0
  257. tests/test_symbolic_model.py +248 -0
  258. tests/test_workload.py +141 -0
@@ -0,0 +1,33 @@
1
+ mapping:
2
+ nodes:
3
+ {% for i in range(N_EINSUMS+1) %}
4
+ - !Storage
5
+ tensors: [T{{i}}]
6
+ component: MainMemory
7
+ {% endfor %}
8
+ {% for i in range(N_EINSUMS) %}
9
+ - !Storage
10
+ tensors: [W{{i}}]
11
+ component: MainMemory
12
+ {% endfor %}
13
+ - !Temporal
14
+ rank_variable: m
15
+ tile_shape: 1
16
+ - !Sequential
17
+ nodes:
18
+ {% for i in range(N_EINSUMS) %}
19
+ - !Nested
20
+ nodes:
21
+ - !Storage
22
+ tensors: [T{{i}}, W{{i}}, T{{i+1}}]
23
+ component: GlobalBuffer
24
+ - !Temporal
25
+ rank_variable: n{{i}}
26
+ tile_shape: 1
27
+ - !Temporal
28
+ rank_variable: n{{i+1}}
29
+ tile_shape: 1
30
+ - !Compute
31
+ einsum: Matmul{{i}}
32
+ component: MAC
33
+ {% endfor %}
@@ -0,0 +1,33 @@
1
+ # The YAML tag at the beginning of the component tells the parser what type of component
2
+ # it is.
3
+ !Memory
4
+
5
+ name: GlobalBuffer
6
+
7
+ # This component_class invokes the hwcomponents_library.SmartBufferSRAM model. The
8
+ # component is a SRAM buffer with an address register that is updated on every
9
+ # access to queue up the next data.
10
+ component_class: SmartBufferSRAM
11
+
12
+ # Fields can be parsed as expressions. The size field is given in bits, and we write
13
+ # here that it is 512 × 32 bits.
14
+ size: 512 * 32
15
+
16
+ # Sometimes, hwcomponents models require additional attributes that are not part of the
17
+ # accelforge spec. These can be passed to the component models through the
18
+ # extra_attributes_for_component_model field.
19
+ extra_attributes_for_component_model:
20
+ depth: 512
21
+ width: size // depth
22
+ n_banks: 32
23
+ tech_node: 65e-9
24
+
25
+ # Actions that the hwcomponents_library model must support. All action attributes that
26
+ # can are passed to the model's appropriate method (in this case, `read` or `write`).
27
+ # The `bits_per_action` argument is special because it determines how many bits are
28
+ # transferred by each read or write. Additionally, to pass extra attributes to the
29
+ # component model as arguments, actions also have a extra_attributes_for_component_model
30
+ # field.
31
+ actions:
32
+ - {name: read, bits_per_action: 64}
33
+ - {name: write, bits_per_action: 64}
@@ -0,0 +1,124 @@
1
+ # Each tensor is shaped by a set of ranks, denoted by capital letters
2
+ # For example: Q is shaped by (B, M, H, E)
3
+ # We'll use lower-case letters to index into the ranks
4
+ # For example: Q[b, m, h, e] is the tensor Q at index (b, m, h, e)
5
+
6
+ # When making a projection list, it's equivalent to the Einsum subscript notation, so:
7
+ # Q projection [b, m, h, e] means that b indexes into B, m indexes into M...
8
+ # When making a projection dict, it's equivalent to the Einsum subscript/superscript notation, so:
9
+ # K projection { B: b, M: p, H: h, E: e } means that b indexes into B, p indexes into M...
10
+
11
+ # Renames take a tensor name and turn them into a canonical name that we can use in
12
+ # architecture constraints. For example, we want to use the words "input", "weight", and
13
+ # "output" to refer to the tensors of an Einsum, but the Einsum QK has no clear "weight"
14
+ # or "input" because both Q and K are inputs. So we rename K to be weight.
15
+
16
+
17
+ workload:
18
+ rank_sizes:
19
+ {% set BATCH_SIZE = BATCH_SIZE | default(1) %}
20
+ {% set N_TOKENS = N_TOKENS | default(8192) %}
21
+ B: {{BATCH_SIZE}}
22
+ P: {{N_TOKENS}}
23
+ M: {{N_TOKENS}}
24
+ H: 32
25
+ E: 128
26
+ F: 128
27
+ D: 4096 # = e * h
28
+ C: 16384
29
+ J: 4096
30
+ G: 4096
31
+
32
+ bits_per_value: {All: 8}
33
+
34
+ einsums:
35
+ - name: I
36
+ # Copy operation means that we move the input tensor from one place to another
37
+ # without doing computation. This lets us copy the input tensor onto the accelerator
38
+ # once and then use it in the Q, K, and V operations.
39
+ is_copy_operation: True # no longer needed with `map` and `reduce` support
40
+ tensor_accesses:
41
+ - {name: I_in, projection: [b, m, d]}
42
+ - {name: I, projection: [b, m, d], output: True}
43
+
44
+ # operations:
45
+ # map: {operation} if output = f(inputs), None if output = inputs
46
+ # reduce: {operation} if output = reduce(partial_outputA, partial_outputB, ...), None if not supported
47
+ # populate: {operation} if initial_output = populate, None if initial_output = first-generated partial output
48
+
49
+ # operations:
50
+ # map: None # Alternatives: "mul", "relu", etc.
51
+ # reduce: None # Alternatives: "max", etc. Note: None means "give me whatever is the last value (mapping dependent)"
52
+ # # reduce: None with strict checking: if there is a reduce, an error is thrown.
53
+ # populate: None
54
+ renames: {weight: Nothing, input: Inputs, output: Outputs}
55
+
56
+ - name: V
57
+ tensor_accesses:
58
+ - {name: I, projection: [b, m, d]}
59
+ - {name: WV, projection: [h, e, d], persistent: True}
60
+ - {name: V, projection: [b, m, h, e], output: True}
61
+
62
+ - name: K
63
+ tensor_accesses:
64
+ - {name: I, projection: [b, m, d]}
65
+ - {name: WK, projection: [h, e, d], persistent: True}
66
+ - {name: K, projection: [b, m, h, e], output: True}
67
+
68
+ - name: Q
69
+ tensor_accesses:
70
+ - {name: I, projection: [b, m, d]}
71
+ - {name: WQ, projection: [h, e, d], persistent: True}
72
+ - {name: Q, projection: [b, m, h, e], output: True}
73
+
74
+ - name: QK
75
+ tensor_accesses:
76
+ - {name: Q, projection: [b, m, h, e]}
77
+ - {name: K, projection: { B: b, M: p, H: h, E: e }}
78
+ - {name: QK, projection: [b, m, p, h], output: True}
79
+ renames: {weight: K, input: Q, output: QK}
80
+
81
+ - name: QK_softmax
82
+ tensor_accesses:
83
+ - {name: QK, projection: [b, m, p, h]}
84
+ - {name: QK_softmax, projection: [b, m, p, h], output: True}
85
+ renames: {weight: Nothing}
86
+
87
+ - name: AV
88
+ tensor_accesses:
89
+ - {name: QK_softmax, projection: [b, m, p, h]}
90
+ - {name: V, projection: { B: b, M: p, H: h, E: f}}
91
+ - {name: AV, projection: [b, m, h, f], output: True}
92
+ renames: {weight: V, input: QK_softmax}
93
+
94
+ - name: Z
95
+ tensor_accesses:
96
+ - {name: AV, projection: [b, m, h, f]}
97
+ - {name: WZ, projection: [h, f, g], persistent: True}
98
+ - {name: Z, projection: [b, m, g], output: True}
99
+
100
+ - name: FFA
101
+ tensor_accesses:
102
+ - {name: Z, projection: [b, m, g]}
103
+ - {name: WFFA, projection: [g, c], persistent: True}
104
+ - {name: FFA, projection: [b, m, c], output: True}
105
+
106
+ - name: FFB
107
+ tensor_accesses:
108
+ - {name: FFA, projection: [b, m, c]}
109
+ - {name: WFFB, projection: [c, j], persistent: True}
110
+ - {name: FFB, projection: [b, m, j], output: True}
111
+
112
+ renames:
113
+ einsums:
114
+ - name: default
115
+ tensor_accesses:
116
+ - name: input
117
+ source: Inputs & Intermediates
118
+ expected_count: 1
119
+ - name: output
120
+ source: Outputs
121
+ expected_count: 1
122
+ - name: weight
123
+ source: ~(input | output)
124
+ expected_count: 1
@@ -0,0 +1,20 @@
1
+ workload:
2
+ iteration_space_shape:
3
+ {% set M = M | default(128) %}
4
+ {% set KN = KN | default(128) %}
5
+ m: 0 <= m < {{M}}
6
+ {% for i in range(N_EINSUMS+1) %}
7
+ n{{i}}: 0 <= n{{i}} < {{KN}}
8
+ {% endfor %}
9
+
10
+ bits_per_value: {All: 8}
11
+
12
+ einsums:
13
+ {% for i in range(N_EINSUMS) %}
14
+ - name: Matmul{{i}}
15
+ tensor_accesses:
16
+ - {name: T{{i}}, projection: [m, n{{i}}]}
17
+ - {name: W{{i}}, projection: [n{{i}}, n{{i+1}}]}
18
+ - {name: T{{i+1}}, projection: [m, n{{i+1}}], output: True}
19
+ renames: {weight: W{{i}}, input: T{{i}}, output: T{{i+1}}}
20
+ {% endfor %}
@@ -0,0 +1,81 @@
1
+ workload:
2
+ rank_sizes:
3
+ {% set BATCH_SIZE = BATCH_SIZE | default(1) %}
4
+ B: {{BATCH_SIZE}}
5
+ P0: 28
6
+ Q0: 28
7
+ N0: 24
8
+ T0: 192
9
+ N1: 24
10
+ P1: 28
11
+ Q1: 28
12
+ P2: 28
13
+ Q2: 28
14
+ T1: 192
15
+ N2: 24
16
+
17
+ bits_per_value: {All: 8}
18
+
19
+ einsums:
20
+ - name: PiecewiseA0
21
+ tensor_accesses:
22
+ - {name: T0, projection: [p0, q0, n0, b]}
23
+ - {name: WA0, projection: [n0, t0]}
24
+ - {name: TA0, projection: [p0, q0, t0, b], output: True}
25
+ renames: {input: T0, output: TA0, weight: WA0}
26
+
27
+ - name: Depthwise0
28
+ tensor_accesses:
29
+ - name: TA0
30
+ projection:
31
+ P0: p1 + r0
32
+ Q0: q1 + s0
33
+ T0: t0
34
+ B: b
35
+ - {name: WAB0, projection: [r0, s0, t0]}
36
+ - {name: TB0, projection: [p1, q1, t0, b], output: True}
37
+ shape: [0 <= r0 < 3, 0 <= s0 < 3]
38
+
39
+ - name: PiecewiseB0
40
+ tensor_accesses:
41
+ - {name: TB0, projection: [p1, q1, t0, b]}
42
+ - {name: WB0, projection: [t0, n1]}
43
+ - {name: T1, projection: [p1, q1, n1, b], output: True}
44
+
45
+ - name: PiecewiseA1
46
+ tensor_accesses:
47
+ - {name: T1, projection: [p1, q1, n1, b]}
48
+ - {name: WA1, projection: [n1, t1]}
49
+ - {name: TA1, projection: [p1, q1, t1, b], output: True}
50
+
51
+ - name: Depthwise1
52
+ tensor_accesses:
53
+ - name: TA1
54
+ projection:
55
+ P1: p2 + r1
56
+ Q1: q2 + s1
57
+ T1: t1
58
+ B: b
59
+ - {name: WAB1, projection: [r1, s1, t1]}
60
+ - {name: TB1, projection: [p2, q2, t1, b], output: True}
61
+ shape: [1 <= r1 < 3, 1 <= s1 < 3]
62
+
63
+ - name: PiecewiseB2
64
+ tensor_accesses:
65
+ - {name: TB1, projection: [p2, q2, t1, b]}
66
+ - {name: WB1, projection: [t1, n2]}
67
+ - {name: T2, projection: [p2, q2, n2, b], output: True}
68
+
69
+ renames:
70
+ einsums:
71
+ - name: default
72
+ tensor_accesses:
73
+ - name: input
74
+ source: Inputs & Intermediates
75
+ expected_count: 1
76
+ - name: output
77
+ source: Outputs
78
+ expected_count: 1
79
+ - name: weight
80
+ source: ~(input | output)
81
+ expected_count: 1
@@ -0,0 +1,106 @@
1
+ workload:
2
+ version: "0.5"
3
+ shape:
4
+ {% set BATCH_SIZE = BATCH_SIZE | default(1) %}
5
+ b: 0 <= b < {{BATCH_SIZE}}
6
+ pp0: 0 <= pp0 < 116
7
+ qp0: 0 <= qp0 < 116
8
+ cp0: 0 <= cp0 < 16
9
+ mp0: 0 <= mp0 < 16
10
+ pd0: 0 <= pd0 < 116
11
+ qd0: 0 <= qd0 < 116
12
+ cd0: 0 <= cd0 < 16
13
+ rd0: 0 <= rd0 < 3
14
+ sd0: 0 <= sd0 < 3
15
+
16
+ pp1: 0 <= pp1 < 56
17
+ qp1: 0 <= qp1 < 56
18
+ cp1: 0 <= cp1 < 24
19
+ mp1: 0 <= mp1 < 72
20
+ pd1: 0 <= pd1 < 56
21
+ qd1: 0 <= qd1 < 56
22
+ cd1: 0 <= cd1 < 72
23
+ rd1: 0 <= rd1 < 5
24
+ sd1: 0 <= sd1 < 5
25
+
26
+ pp2: 0 <= pp2 < 28
27
+ qp2: 0 <= qp2 < 28
28
+ cp2: 0 <= cp2 < 40
29
+ mp2: 0 <= mp2 < 120
30
+ pd2: 0 <= pd2 < 28
31
+ qd2: 0 <= qd2 < 28
32
+ cd2: 0 <= cd2 < 120
33
+ rd2: 0 <= rd2 < 3
34
+ sd2: 0 <= sd2 < 3
35
+
36
+ einsums:
37
+ - name: P0
38
+ tensor_accesses:
39
+ - {name: IP0, projection: [pp0, qp0, cp0, b]}
40
+ - {name: WP0, projection: [cp0, mp0]}
41
+ - {name: OP0, projection: [pp0, qp0, mp0, b], output: True}
42
+ renames: {input: IP0, output: OP0, weight: WP0}
43
+
44
+ - name: D0
45
+ tensor_accesses:
46
+ - name: ID0
47
+ projection:
48
+ PD0: pd0 + rd0
49
+ QD0: qd0 + sd0
50
+ CD0: cd0
51
+ B: b
52
+ - {name: WD0, projection: [rd0, sd0, cd0]}
53
+ - {name: OD0, projection: [pd0, qd0, cd0, b], output: True}
54
+ renames: {input: ID0, output: OD0, weight: WD0}
55
+
56
+ - name: P1
57
+ tensor_accesses:
58
+ - {name: IP1, projection: [pp1, qp1, cp1, b]}
59
+ - {name: WP1, projection: [cp1, mp1]}
60
+ - {name: OP1, projection: [pp1, qp1, mp1, b], output: True}
61
+ renames: {input: IP1, output: OP1, weight: WP1}
62
+
63
+ - name: D1
64
+ tensor_accesses:
65
+ - name: ID1
66
+ projection:
67
+ PD1: pd1 + rd1
68
+ QD1: qd1 + sd1
69
+ CD1: cd1
70
+ B: b
71
+ - {name: WD1, projection: [rd1, sd1, cd1]}
72
+ - {name: OD1, projection: [pd1, qd1, cd1, b], output: True}
73
+ renames: {input: ID1, output: OD1, weight: WD1}
74
+
75
+ - name: P2
76
+ tensor_accesses:
77
+ - {name: IP2, projection: [pp2, qp2, cp2, b]}
78
+ - {name: WP2, projection: [cp2, mp2]}
79
+ - {name: OP2, projection: [pp2, qp2, mp2, b], output: True}
80
+ renames: {input: IP2, output: OP2, weight: WP2}
81
+
82
+ - name: D2
83
+ tensor_accesses:
84
+ - name: ID2
85
+ projection:
86
+ PD2: pd2 + rd2
87
+ QD2: qd2 + sd2
88
+ CD2: cd2
89
+ B: b
90
+ - {name: WD2, projection: [rd2, sd2, cd2]}
91
+ - {name: OD2, projection: [pd2, qd2, cd2, b], output: True}
92
+ renames: {input: ID2, output: OD2, weight: WD2}
93
+
94
+ renames:
95
+ einsums:
96
+ - name: default
97
+ tensor_accesses:
98
+ - name: input
99
+ source: Inputs() & Intermediates()
100
+ expected_count: 1
101
+ - name: output
102
+ source: Outputs()
103
+ expected_count: 1
104
+ - name: weight
105
+ source: ~(input | output)
106
+ expected_count: 1
@@ -0,0 +1,59 @@
1
+ workload:
2
+ # These rank sizes define the shapes of the tensors in the Einsum. Shapes are assumed
3
+ # to go from [0, size-1]. Indexes into a rank are omitted if out of range.
4
+ rank_sizes:
5
+ M: 128
6
+ N0: 128
7
+ N1: 128
8
+ N2: 128
9
+ N3: 128
10
+
11
+ # Alternatively, we can constrain each of the rank variables to be within a range. The
12
+ # values in this dictionary are ISL expressions, and the constraints apply to all
13
+ # Einsums that use these rank variables.
14
+ iteration_space_shape:
15
+ m: 0 <= m < 128
16
+ n0: 0 <= n0 < 128
17
+ n1: 0 <= n1 < 128
18
+ n2: 0 <= n2 < 128
19
+ n3: 0 <= n3 < 128
20
+
21
+ # Describe the number of bits of each value of each tensor. This is a dictionary of
22
+ # set expressions to bits per value for the tensors given by those expressions. They
23
+ # can be overridden by the bits_per_value attribute of any tensor access.
24
+ bits_per_value: {All: 8}
25
+
26
+ # The Einsums in the workload.
27
+ einsums:
28
+ - name: Matmul1
29
+ tensor_accesses:
30
+ - {name: T0, projection: [m, n0]}
31
+ - {name: W0, projection: [n0, n1]}
32
+ - {name: T1, projection: [m, n1], output: True}
33
+ renames: {input: T0}
34
+
35
+ - name: Matmul2
36
+ tensor_accesses:
37
+ - {name: T1, projection: [m, n1]}
38
+ - {name: W1, projection: [n1, n2]}
39
+ - {name: T2, projection: [m, n2], output: True}
40
+
41
+ - name: Matmul3
42
+ tensor_accesses:
43
+ - {name: T2, projection: [m, n2]}
44
+ - {name: W2, projection: [n2, n3]}
45
+ - {name: T3, projection: [m, n3], output: True}
46
+
47
+ renames:
48
+ einsums:
49
+ - name: default
50
+ tensor_accesses:
51
+ - name: input
52
+ source: Inputs & Intermediates
53
+ expected_count: 1
54
+ - name: output
55
+ source: Outputs
56
+ expected_count: 1
57
+ - name: weight
58
+ source: ~(input | output)
59
+ expected_count: 1