accelforge 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (258) hide show
  1. accelforge/__init__.py +21 -0
  2. accelforge/_accelerated_imports.py +16 -0
  3. accelforge/_deprecate/_simanneal/evalmapping.py +271 -0
  4. accelforge/_deprecate/_simanneal/mapspaceglobals.py +298 -0
  5. accelforge/_deprecate/_simanneal/simanneal.py +666 -0
  6. accelforge/_deprecate/_simanneal/tracking.py +105 -0
  7. accelforge/_deprecate/_simanneal/wrappers.py +218 -0
  8. accelforge/_deprecate/_simanneal2/__init__.py +7 -0
  9. accelforge/_deprecate/_simanneal2/simanneal.py +493 -0
  10. accelforge/_deprecate/_simanneal2/tracking.py +116 -0
  11. accelforge/_deprecate/compatibility_util.py +181 -0
  12. accelforge/_deprecate/layerdeduplication/__init__.py +2 -0
  13. accelforge/_deprecate/layerdeduplication/group_similar_einsums.py +160 -0
  14. accelforge/_deprecate/layerdeduplication/grouped_einsums.py +84 -0
  15. accelforge/_deprecate/mapping_filter_tags/__init__.py +2 -0
  16. accelforge/_deprecate/mapping_filter_tags/ffmt.py +212 -0
  17. accelforge/_deprecate/mapping_filter_tags/onesplit.py +24 -0
  18. accelforge/_deprecate/mapping_filter_tags/util.py +24 -0
  19. accelforge/_deprecate/tags.py +69 -0
  20. accelforge/_deprecate/viz/__init__.py +0 -0
  21. accelforge/_deprecate/viz/interactive.py +159 -0
  22. accelforge/_deprecate/viz/reservationtree.py +307 -0
  23. accelforge/_deprecate/viz/ski_slope.py +88 -0
  24. accelforge/_version.py +15 -0
  25. accelforge/examples.py +39 -0
  26. accelforge/frontend/__init__.py +10 -0
  27. accelforge/frontend/_binding.py +129 -0
  28. accelforge/frontend/_workload_isl/__init__.py +2 -0
  29. accelforge/frontend/_workload_isl/_isl.py +149 -0
  30. accelforge/frontend/_workload_isl/_symbolic.py +141 -0
  31. accelforge/frontend/arch copy.py +1544 -0
  32. accelforge/frontend/arch.py +1642 -0
  33. accelforge/frontend/config.py +63 -0
  34. accelforge/frontend/mapper/__init__.py +5 -0
  35. accelforge/frontend/mapper/ffm.py +126 -0
  36. accelforge/frontend/mapper/mapper.py +7 -0
  37. accelforge/frontend/mapper/metrics.py +30 -0
  38. accelforge/frontend/mapping/__init__.py +1 -0
  39. accelforge/frontend/mapping/mapping.py +1736 -0
  40. accelforge/frontend/model.py +14 -0
  41. accelforge/frontend/renames.py +150 -0
  42. accelforge/frontend/spec copy.py +230 -0
  43. accelforge/frontend/spec.py +301 -0
  44. accelforge/frontend/variables.py +12 -0
  45. accelforge/frontend/workload.py +952 -0
  46. accelforge/mapper/FFM/__init__.py +9 -0
  47. accelforge/mapper/FFM/_join_pmappings/__init__.py +0 -0
  48. accelforge/mapper/FFM/_join_pmappings/compatibility.py +653 -0
  49. accelforge/mapper/FFM/_join_pmappings/compress_pmappings.py +140 -0
  50. accelforge/mapper/FFM/_join_pmappings/join_pmappings.py +703 -0
  51. accelforge/mapper/FFM/_join_pmappings/pmapping_dataframe.py +901 -0
  52. accelforge/mapper/FFM/_join_pmappings/pmapping_group.py +337 -0
  53. accelforge/mapper/FFM/_make_pmappings/contraints/__init__.py +0 -0
  54. accelforge/mapper/FFM/_make_pmappings/contraints/constraints.py +360 -0
  55. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/__init__.py +1 -0
  56. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_loops.py +373 -0
  57. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_pmapping_templates.py +463 -0
  58. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_reservations.py +95 -0
  59. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storage_order.py +382 -0
  60. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storages.py +155 -0
  61. accelforge/mapper/FFM/_make_pmappings/make_pmappings.py +411 -0
  62. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/__init__.py +1 -0
  63. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_pmappings_from_templates.py +407 -0
  64. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_tile_shapes.py +1681 -0
  65. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/run_model.py +170 -0
  66. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/symbol_relations.py +174 -0
  67. accelforge/mapper/FFM/_make_pmappings/pmapper_job.py +282 -0
  68. accelforge/mapper/FFM/_pareto_df/df_convention.py +273 -0
  69. accelforge/mapper/FFM/_pareto_df/pareto copy.py +836 -0
  70. accelforge/mapper/FFM/_pareto_df/pareto.py +508 -0
  71. accelforge/mapper/FFM/data.py +61 -0
  72. accelforge/mapper/FFM/main copy.py +236 -0
  73. accelforge/mapper/FFM/main.py +208 -0
  74. accelforge/mapper/FFM/mappings.py +510 -0
  75. accelforge/mapper/FFM/pmappings.py +310 -0
  76. accelforge/mapper/__init__.py +4 -0
  77. accelforge/mapper.py +0 -0
  78. accelforge/model/__init__.py +1 -0
  79. accelforge/model/_looptree/__init__.py +0 -0
  80. accelforge/model/_looptree/accesses.py +335 -0
  81. accelforge/model/_looptree/capacity/__init__.py +1 -0
  82. accelforge/model/_looptree/capacity/aggregators.py +36 -0
  83. accelforge/model/_looptree/capacity/capacity.py +47 -0
  84. accelforge/model/_looptree/energy.py +150 -0
  85. accelforge/model/_looptree/equivalent_ranks.py +29 -0
  86. accelforge/model/_looptree/latency/__init__.py +1 -0
  87. accelforge/model/_looptree/latency/latency.py +98 -0
  88. accelforge/model/_looptree/latency/memory.py +120 -0
  89. accelforge/model/_looptree/latency/processors.py +92 -0
  90. accelforge/model/_looptree/mapping_utilities.py +71 -0
  91. accelforge/model/_looptree/reuse/__init__.py +4 -0
  92. accelforge/model/_looptree/reuse/isl/__init__.py +1 -0
  93. accelforge/model/_looptree/reuse/isl/des.py +59 -0
  94. accelforge/model/_looptree/reuse/isl/isl_functions.py +374 -0
  95. accelforge/model/_looptree/reuse/isl/mapping_to_isl/__init__.py +4 -0
  96. accelforge/model/_looptree/reuse/isl/mapping_to_isl/analyze_mapping.py +297 -0
  97. accelforge/model/_looptree/reuse/isl/mapping_to_isl/skews_from_mapping.py +236 -0
  98. accelforge/model/_looptree/reuse/isl/mapping_to_isl/tiling.py +685 -0
  99. accelforge/model/_looptree/reuse/isl/mapping_to_isl/types.py +188 -0
  100. accelforge/model/_looptree/reuse/isl/spatial.py +260 -0
  101. accelforge/model/_looptree/reuse/isl/temporal.py +182 -0
  102. accelforge/model/_looptree/reuse/symbolic/__init__.py +1 -0
  103. accelforge/model/_looptree/reuse/symbolic/symbolic copy 2.py +1346 -0
  104. accelforge/model/_looptree/reuse/symbolic/symbolic copy.py +1408 -0
  105. accelforge/model/_looptree/reuse/symbolic/symbolic.py +1396 -0
  106. accelforge/model/_looptree/run.py +122 -0
  107. accelforge/model/_looptree/types.py +26 -0
  108. accelforge/model/_looptree/visualization/__init__.py +0 -0
  109. accelforge/model/_looptree/visualization/occupancy.py +11 -0
  110. accelforge/model/main.py +222 -0
  111. accelforge/plotting/__init__.py +2 -0
  112. accelforge/plotting/mappings.py +219 -0
  113. accelforge/plotting/specs.py +57 -0
  114. accelforge/util/__init__.py +4 -0
  115. accelforge/util/_base_analysis_types.py +24 -0
  116. accelforge/util/_basetypes.py +1089 -0
  117. accelforge/util/_frozenset.py +36 -0
  118. accelforge/util/_isl.py +29 -0
  119. accelforge/util/_itertools.py +14 -0
  120. accelforge/util/_mathfuncs.py +57 -0
  121. accelforge/util/_parse_expressions.py +339 -0
  122. accelforge/util/_picklecache.py +32 -0
  123. accelforge/util/_setexpressions.py +268 -0
  124. accelforge/util/_sympy/__init__.py +0 -0
  125. accelforge/util/_sympy/broadcast_max.py +18 -0
  126. accelforge/util/_visualization.py +112 -0
  127. accelforge/util/_yaml.py +579 -0
  128. accelforge/util/parallel.py +193 -0
  129. accelforge-0.0.1.dist-info/METADATA +64 -0
  130. accelforge-0.0.1.dist-info/RECORD +258 -0
  131. accelforge-0.0.1.dist-info/WHEEL +5 -0
  132. accelforge-0.0.1.dist-info/licenses/LICENSE +19 -0
  133. accelforge-0.0.1.dist-info/top_level.txt +5 -0
  134. docs/_build/html/_sources/fastfusion.frontend.mapper.rst.txt +37 -0
  135. docs/_build/html/_sources/fastfusion.frontend.rst.txt +70 -0
  136. docs/_build/html/_sources/fastfusion.frontend.workload.rst.txt +21 -0
  137. docs/_build/html/_sources/fastfusion.mapper.FFM.rst.txt +37 -0
  138. docs/_build/html/_sources/fastfusion.mapper.rst.txt +18 -0
  139. docs/_build/html/_sources/fastfusion.rst.txt +20 -0
  140. docs/_build/html/_sources/fastfusion.util.rst.txt +21 -0
  141. docs/_build/html/_sources/index.rst.txt +87 -0
  142. docs/_build/html/_sources/modules.rst.txt +7 -0
  143. docs/_build/html/_sources/notes/citation.rst.txt +45 -0
  144. docs/_build/html/_sources/notes/definitions.rst.txt +43 -0
  145. docs/_build/html/_sources/notes/faqs.rst.txt +39 -0
  146. docs/_build/html/_sources/notes/modeling/accelerator_energy_latency.rst.txt +72 -0
  147. docs/_build/html/_sources/notes/modeling/component_energy_area.rst.txt +96 -0
  148. docs/_build/html/_sources/notes/modeling/mapping.rst.txt +100 -0
  149. docs/_build/html/_sources/notes/modeling.rst.txt +33 -0
  150. docs/_build/html/_sources/notes/parsing/arithmetic_parsing.rst.txt +136 -0
  151. docs/_build/html/_sources/notes/parsing/setexpressions.rst.txt +63 -0
  152. docs/_build/html/_sources/notes/parsing/yaml_parsing.rst.txt +176 -0
  153. docs/_build/html/_sources/notes/quickstart_and_installation.rst.txt +9 -0
  154. docs/_build/html/_sources/notes/spec/architecture.rst.txt +133 -0
  155. docs/_build/html/_sources/notes/spec/mapping.rst.txt +12 -0
  156. docs/_build/html/_sources/notes/spec/workload.rst.txt +83 -0
  157. docs/_build/html/_sources/notes/spec.rst.txt +36 -0
  158. docs/source/_ext/include_attrs.py +213 -0
  159. docs/source/_ext/include_docstring.py +364 -0
  160. docs/source/_ext/include_functions.py +154 -0
  161. docs/source/_ext/include_notebook.py +131 -0
  162. docs/source/_ext/include_yaml.py +119 -0
  163. docs/source/_ext/inherited_attributes.py +222 -0
  164. docs/source/_ext/paths.py +4 -0
  165. docs/source/conf.py +79 -0
  166. examples/arches/compute_in_memory/_include.yaml +74 -0
  167. examples/arches/compute_in_memory/_include_functions.py +229 -0
  168. examples/arches/compute_in_memory/_load_spec.py +57 -0
  169. examples/arches/compute_in_memory/components/c2c_multiplier.py +181 -0
  170. examples/arches/compute_in_memory/components/dac_c2c_r2r.py +605 -0
  171. examples/arches/compute_in_memory/components/misc.py +195 -0
  172. examples/arches/compute_in_memory/components/util/bit_functions.py +51 -0
  173. examples/arches/compute_in_memory/components/zero_comparator.py +92 -0
  174. examples/arches/compute_in_memory/isaac.yaml +233 -0
  175. examples/arches/compute_in_memory/memory_cells/ecram_demo.yaml +63 -0
  176. examples/arches/compute_in_memory/memory_cells/rram_example.yaml +63 -0
  177. examples/arches/compute_in_memory/memory_cells/rram_isaac_isca_2016.yaml +64 -0
  178. examples/arches/compute_in_memory/memory_cells/rram_neurosim_default.yaml +63 -0
  179. examples/arches/compute_in_memory/memory_cells/rram_raella_isca_2023.yaml +70 -0
  180. examples/arches/compute_in_memory/memory_cells/rram_wan_nature_2022.yaml +63 -0
  181. examples/arches/compute_in_memory/memory_cells/sram_colonnade_jssc_2021.yaml +63 -0
  182. examples/arches/compute_in_memory/memory_cells/sram_example.yaml +63 -0
  183. examples/arches/compute_in_memory/memory_cells/sram_jia_jssc_2020.yaml +63 -0
  184. examples/arches/compute_in_memory/memory_cells/sram_sinangil_jssc_2021.yaml +63 -0
  185. examples/arches/compute_in_memory/memory_cells/sram_wang_vlsi_2022.yaml +63 -0
  186. examples/arches/compute_in_memory/wang_vlsi_2022.yaml +289 -0
  187. examples/arches/eyeriss.yaml +68 -0
  188. examples/arches/fanout_variations/at_glb.yaml +31 -0
  189. examples/arches/fanout_variations/at_glb_with_fanout_node.yaml +34 -0
  190. examples/arches/fanout_variations/at_mac.yaml +31 -0
  191. examples/arches/fanout_variations/at_mac_with_constraints.yaml +38 -0
  192. examples/arches/fanout_variations/at_mac_with_fanout_node.yaml +34 -0
  193. examples/arches/nvdla.yaml +47 -0
  194. examples/arches/simple.yaml +28 -0
  195. examples/arches/tpu_v4i.yaml +67 -0
  196. examples/mappings/unfused_matmuls_to_simple.yaml +33 -0
  197. examples/misc/component_annotated.yaml +33 -0
  198. examples/workloads/gpt3_6.7B.yaml +124 -0
  199. examples/workloads/matmuls.yaml +20 -0
  200. examples/workloads/mobilenet_28.yaml +81 -0
  201. examples/workloads/mobilenet_various_separate.yaml +106 -0
  202. examples/workloads/three_matmuls_annotated.yaml +59 -0
  203. notebooks/.ipynb_checkpoints/fastfusion_arch_study_michael-checkpoint.ipynb +359 -0
  204. notebooks/compute_in_memory/_scripts.py +339 -0
  205. notebooks/compute_in_memory/isaac.guide.ipynb +270 -0
  206. notebooks/compute_in_memory/wang_vlsi_2022.ipynb +602 -0
  207. notebooks/paths.py +4 -0
  208. notebooks/tutorials/.ipynb_checkpoints/1_FFM-checkpoint.ipynb +3110 -0
  209. notebooks/tutorials/FFM.ipynb +3498 -0
  210. notebooks/tutorials/_include.py +48 -0
  211. notebooks/tutorials/component_energy_area.ipynb +363 -0
  212. tests/Q_mapping.yaml +38 -0
  213. tests/__init__.py +0 -0
  214. tests/conv.mapping.yaml +27 -0
  215. tests/conv.workload.yaml +13 -0
  216. tests/conv_sym.mapping.yaml +43 -0
  217. tests/copy.mapping.yaml +35 -0
  218. tests/copy.workload.yaml +15 -0
  219. tests/distribuffers/__init__.py +0 -0
  220. tests/distribuffers/multicast/test_cases.yaml +482 -0
  221. tests/distribuffers/spec/binding/valid_bindings.yaml +97 -0
  222. tests/distribuffers/spec/distributed.yaml +100 -0
  223. tests/distribuffers/spec/logical_arch.yaml +32 -0
  224. tests/distribuffers/spec/physical_arch.yaml +69 -0
  225. tests/distribuffers/test_binding.py +48 -0
  226. tests/frontend/__init__.py +0 -0
  227. tests/frontend/test_mapping_viz.py +52 -0
  228. tests/mapper/__init__.py +0 -0
  229. tests/mapper/configs/conv1d/conv1d.mapping.yaml +31 -0
  230. tests/mapper/configs/conv1d/conv1d.workload.yaml +11 -0
  231. tests/mapper/configs/two_conv1d/two_conv1d.expected.yaml +38 -0
  232. tests/mapper/configs/two_conv1d/two_conv1d.mapping.yaml +54 -0
  233. tests/mapper/configs/two_conv1d/two_conv1d.workload.yaml +19 -0
  234. tests/mapper/test_mapping_to_isl.py +90 -0
  235. tests/mapper/test_spatial_reuse_analysis.py +67 -0
  236. tests/mapper/test_temporal_reuse_analysis.py +56 -0
  237. tests/mapper/util.py +58 -0
  238. tests/matmul.mapping.yaml +29 -0
  239. tests/matmul.workload.yaml +12 -0
  240. tests/matmul_spatial.mapping.yaml +44 -0
  241. tests/mha.renames.yaml +65 -0
  242. tests/mha.workload.yaml +67 -0
  243. tests/mha.yaml +59 -0
  244. tests/mha_full.workload.yaml +67 -0
  245. tests/mobilenet.workload.yaml +35 -0
  246. tests/mobilenet_long.workload.yaml +64 -0
  247. tests/pmappingcache.py +24 -0
  248. tests/processing_stage.arch.yaml +40 -0
  249. tests/snowcat.arch.yaml +36 -0
  250. tests/test_ffm_join_pmappings.py +106 -0
  251. tests/test_ffm_make_pmappings.py +82 -0
  252. tests/test_ffm_make_tile_shapes.py +49 -0
  253. tests/test_mapper.py +100 -0
  254. tests/test_model.py +37 -0
  255. tests/test_plotting.py +72 -0
  256. tests/test_processing_stage.py +46 -0
  257. tests/test_symbolic_model.py +248 -0
  258. tests/test_workload.py +141 -0
@@ -0,0 +1,482 @@
1
+ - occ: "{ noc[xs, ys] -> data[d0, d1] : d0=xs and d1=ys and 0 <= xs < 8 and 0 <= ys < 8 }"
2
+ fill: "{ noc[xd, yd] -> data[d0, d1] : d0=xd and 0 <= d1 < 8 and 0 <= xd < 8 and 0 <= yd < 8 }"
3
+ dims: &2d_spatial
4
+ - type: Spatial
5
+ spatial_dim: 0
6
+ target: 0
7
+ - type: Spatial
8
+ spatial_dim: 1
9
+ target: 0
10
+ dist_func: &2d_manhattan |
11
+ {
12
+ [noc[xd, yd] -> noc[xs, ys]] -> hops[(xd - xs) + (yd - ys)] :
13
+ xd >= xs and yd >= ys;
14
+ [noc[xd, yd] -> noc[xs, ys]] -> hops[-(xd - xs) + -(yd - ys)] :
15
+ xd < xs and yd < ys;
16
+ [noc[xd, yd] -> noc[xs, ys]] -> hops[-(xd - xs) + (yd - ys)] :
17
+ xd < xs and yd >= ys;
18
+ [noc[xd, yd] -> noc[xs, ys]] -> hops[(xd - xs) + -(yd - ys)] :
19
+ xd >= xs and yd < ys
20
+ }
21
+ expected: &equivalent_class_1
22
+ latency: 7
23
+ total_hops: 1344
24
+ multicast_hops: 448
25
+ hypercube_hops: 448
26
+ extent_DOR_hops: 448
27
+ - occ: "{ noc[xs, ys] -> data[d0, d1] : d0=xs and d1=ys and 0 <= xs < 8 and 0 <= ys < 8 }"
28
+ fill: "{ noc[xd, yd] -> data[d0, d1] : 0 <= d0 < 8 and d1=yd and 0 <= xd < 8 and 0 <= yd < 8 }"
29
+ dims: *2d_spatial
30
+ dist_func: *2d_manhattan
31
+ expected: *equivalent_class_1
32
+ - occ: "{ noc[xs, ys] -> data[d0, d1] : d0 = xs and 0 <= d1 < 8 and 0 <= xs < 8 and 0 <= ys < 8 }"
33
+ fill: "{ noc[xd, yd] -> data[d0, d1] : d0 = xd and d1 = yd and 0 <= xd < 8 and 0 <= yd < 8 }"
34
+ dims: *2d_spatial
35
+ dist_func: *2d_manhattan
36
+ expected: &equivalent_class_2
37
+ latency: 0
38
+ total_hops: 0
39
+ multicast_hops: 0
40
+ hypercube_hops: 0
41
+ extent_DOR_hops: 448
42
+ - occ: "{ noc[xs, ys] -> data[d0, d1] : 0 <= d0 < 8 and d1 = ys and 0 <= xs < 8 and 0 <= ys < 8 }"
43
+ fill: "{ noc[xd, yd] -> data[d0, d1] : d0 = xd and d1 = yd and 0 <= xd < 8 and 0 <= yd < 8 }"
44
+ dims: *2d_spatial
45
+ dist_func: *2d_manhattan
46
+ expected: *equivalent_class_2
47
+
48
+ #######################
49
+ # Temporal Test Cases #
50
+ #######################
51
+ ##@section 1x1 dummy case to catch temporal integration issues.
52
+ # Edge to Src
53
+ - occ: &1x1_edge |
54
+ {
55
+ noc[tm, tn, xs, ys] -> A[m, k] :
56
+ 0 <= tm < 1 and 0 <= tn < 1 and
57
+ xs=0 and ys=0 and
58
+ 0 <= m < 1 and 0 <= k < 1
59
+ }
60
+ fill: |
61
+ {
62
+ noc[tm, tn, xd, yd] -> A[m, k] :
63
+ 0 <= tm < 1 and 0 <= tn < 1 and tn = 0 and
64
+ 0 <= xd < 1 and 0 <= yd < 1 and
65
+ m = (1 * tm) + xd and 0 <= k < 1
66
+ }
67
+ dims: &2t-dim_2s-dim
68
+ - type: Temporal
69
+ - type: Temporal
70
+ - type: Spatial
71
+ spatial_dim: 0
72
+ target: 0
73
+ - type: Spatial
74
+ spatial_dim: 1
75
+ target: 0
76
+ dist_func: &2t-dim_2s-dim_manhattan |
77
+ {
78
+ [noc[tm, tn, xd, yd] -> noc[tm, tn, xs, ys]] -> hops[(xd - xs) + (yd - ys)] :
79
+ xd >= xs and yd >= ys;
80
+ [noc[tm, tn, xd, yd] -> noc[tm, tn, xs, ys]] -> hops[-(xd - xs) + -(yd - ys)] :
81
+ xd < xs and yd < ys;
82
+ [noc[tm, tn, xd, yd] -> noc[tm, tn, xs, ys]] -> hops[-(xd - xs) + (yd - ys)] :
83
+ xd < xs and yd >= ys;
84
+ [noc[tm, tn, xd, yd] -> noc[tm, tn, xs, ys]] -> hops[(xd - xs) + -(yd - ys)] :
85
+ xd >= xs and yd < ys
86
+ }
87
+ expected:
88
+ latency: null
89
+ total_hops: null
90
+ multicast_hops: null
91
+ hypercube_hops: 0
92
+ extent_DOR_hops: null
93
+ # Src to Dst
94
+ - occ: |
95
+ {
96
+ noc[tm, tn, tk, xs, ys] -> A[m, k] :
97
+ 0 <= tm < 1 and 0 <= tn < 1 and 0 <= tk < 1 and
98
+ 0 <= xs < 1 and 0 <= ys < 1 and
99
+ m= (1 * tm) + xs and 0 <= k < 1
100
+ }
101
+ fill: &1x1_fill |
102
+ {
103
+ noc[tm, tn, tk, xd, yd] -> A[m, k] :
104
+ 0 <= tm < 1 and 0 <= tn < 1 and 0 <= tk < 1 and
105
+ 0 <= xd < 1 and 0 <= yd < 1 and
106
+ m= (1 * tm) + xd and k=tk
107
+ }
108
+ dims: &3t-dim_2s-dim
109
+ - type: Temporal
110
+ - type: Temporal
111
+ - type: Temporal
112
+ - type: Spatial
113
+ spatial_dim: 0
114
+ target: 0
115
+ - type: Spatial
116
+ spatial_dim: 1
117
+ target: 0
118
+ dist_func: &3t-dim_2s-dim_manhattan |
119
+ {
120
+ [noc[tm, tn, tk, xd, yd] -> noc[tm, tn, tk, xs, ys]] -> hops[(xd - xs) + (yd - ys)] :
121
+ xd >= xs and yd >= ys;
122
+ [noc[tm, tn, tk, xd, yd] -> noc[tm, tn, tk, xs, ys]] -> hops[-(xd - xs) + -(yd - ys)] :
123
+ xd < xs and yd < ys;
124
+ [noc[tm, tn, tk, xd, yd] -> noc[tm, tn, tk, xs, ys]] -> hops[-(xd - xs) + (yd - ys)] :
125
+ xd < xs and yd >= ys;
126
+ [noc[tm, tn, tk, xd, yd] -> noc[tm, tn, tk, xs, ys]] -> hops[(xd - xs) + -(yd - ys)] :
127
+ xd >= xs and yd < ys
128
+ }
129
+ expected:
130
+ latency: null
131
+ total_hops: null
132
+ multicast_hops: null
133
+ hypercube_hops: 0
134
+ extent_DOR_hops: null
135
+ ##@section 8x8 case for a bigger chip.
136
+ ##@brief Full duplication to make sure temporal handling is correct.
137
+ # Edge to Src
138
+ - occ: &8x8_edge |
139
+ {
140
+ noc[tm, tn, xs, ys] -> A[m, k] :
141
+ 0 <= tm < 8 and 0 <= tn < 8 and
142
+ xs=0 and ys=0 and
143
+ 0 <= m < 64 and 0 <= k < 64
144
+ }
145
+ fill: |
146
+ {
147
+ noc[tm, tn, xd, yd] -> A[m, k] :
148
+ 0 <= tm < 8 and 0 <= tn < 8 and tn = 0 and
149
+ 0 <= xd < 8 and 0 <= yd < 8 and
150
+ m = (8 * tm) + xd and 0 <= k < 64
151
+ }
152
+ dims: *2t-dim_2s-dim
153
+ dist_func: *2t-dim_2s-dim_manhattan
154
+ expected:
155
+ latency: null
156
+ total_hops: null
157
+ multicast_hops: null
158
+ hypercube_hops: 143360
159
+ extent_DOR_hops: null
160
+ # Src to Dst
161
+ - occ: |
162
+ {
163
+ noc[tm, tn, tk, xs, ys] -> A[m, k] :
164
+ 0 <= tm < 8 and 0 <= tn < 8 and 0 <= tk < 64 and
165
+ 0 <= xs < 8 and 0 <= ys < 8 and
166
+ m = (8 * tm) + xs and 0 <= k < 64
167
+ }
168
+ fill: &8x8_pe |
169
+ {
170
+ noc[tm, tn, tk, xd, yd] -> A[m, k] :
171
+ 0 <= tm < 8 and 0 <= tn < 8 and 0 <= tk < 64 and
172
+ 0 <= xd < 8 and 0 <= yd < 8 and
173
+ 0 <= m < 64 and m = (8 * tm) + xd and
174
+ 0 <= k < 64 and k=tk
175
+ }
176
+ dims: *3t-dim_2s-dim
177
+ dist_func: *3t-dim_2s-dim_manhattan
178
+ expected:
179
+ latency: null
180
+ total_hops: null
181
+ multicast_hops: null
182
+ hypercube_hops: 0
183
+ extent_DOR_hops: null
184
+ ##@brief No duplication, one time step to ensure spatial handling is correct.
185
+ # Edge to Src, only 1 time step.
186
+ - occ: *8x8_edge
187
+ fill: |
188
+ {
189
+ noc[tm, tn, xd, yd] -> A[m, k] :
190
+ 0 <= tm < 8 and tm = 0 and 0 <= tn < 8 and tn = 0 and
191
+ 0 <= xd < 8 and 0 <= yd < 8 and
192
+ 0 <= m < 64 and m = (8 * tm) + xd and
193
+ 0 <= k < 64 and 0 = (yd - k) % 8
194
+ }
195
+ dims: *2t-dim_2s-dim
196
+ dist_func: *2t-dim_2s-dim_manhattan
197
+ expected:
198
+ latency: null
199
+ total_hops: null
200
+ multicast_hops: null
201
+ hypercube_hops: 9856
202
+ extent_DOR_hops: null
203
+ # Src to Dst, only 1 time step.
204
+ - occ: |
205
+ {
206
+ noc[tm, tn, tk, xd, yd] -> A[m, k] :
207
+ 0 <= tm < 8 and tm = 0 and 0 <= tn < 8 and tn = 0 and 0 <= tk < 64 and
208
+ 0 <= xd < 8 and 0 <= yd < 8 and
209
+ 0 <= m < 64 and m = (8 * tm) + xd and
210
+ 0 <= k < 64 and 0 = (yd - k) % 8
211
+ }
212
+ fill: |
213
+ {
214
+ noc[tm, tn, tk, xd, yd] -> A[m, k] :
215
+ 0 <= tm < 8 and tm = 0 and 0 <= tn < 8 and tn = 0 and 0 <= tk < 64 and tk = 0 and
216
+ 0 <= xd < 8 and 0 <= yd < 8 and
217
+ m = (8 * tm) + xd and k=tk
218
+ }
219
+ dims: *3t-dim_2s-dim
220
+ dist_func: *3t-dim_2s-dim_manhattan
221
+ expected:
222
+ latency: null
223
+ total_hops: null
224
+ multicast_hops: null
225
+ hypercube_hops: 56
226
+ extent_DOR_hops: null
227
+ ##@brief No duplication, one spatial dimension to ensure temporal handling is correct.
228
+ ##@note, tn = 0 here because otherwise we recast every tn, which is not fundamental to the problem.
229
+ # Edge to Src, only 1 spatial dimension.
230
+ - occ: *8x8_edge
231
+ fill: |
232
+ {
233
+ noc[tm, tn, xd, yd] -> A[m, k] :
234
+ 0 <= tm < 8 and 0 <= tn < 8 and tn = 0 and
235
+ 0 <= xd < 8 and xd = 0 and 0 <= yd < 8 and
236
+ 0 <= m < 64 and m = (8 * tm) + xd and
237
+ 0 <= k < 64 and 0 = (yd - k) % 8
238
+ }
239
+ dims: *2t-dim_2s-dim
240
+ dist_func: *2t-dim_2s-dim_manhattan
241
+ expected:
242
+ latency: null
243
+ total_hops: null
244
+ multicast_hops: null
245
+ hypercube_hops: 1792
246
+ extent_DOR_hops: null
247
+ # Src to Dst, only 1 spatial dimension.
248
+ - occ: |
249
+ {
250
+ noc[tm, tn, tk, xd, yd] -> A[m, k] :
251
+ 0 <= tm < 8 and 0 <= tn < 8 and 0 <= tk < 64 and
252
+ 0 <= xd < 8 and xd = 0 and 0 <= yd < 8 and
253
+ 0 <= m < 64 and m = (8 * tm) + xd and
254
+ 0 <= k < 64 and 0 = (yd - k) % 8
255
+ }
256
+ fill: |
257
+ {
258
+ noc[tm, tn, tk, xd, yd] -> A[m, k] :
259
+ 0 <= tm < 8 and 0 <= tn < 8 and 0 <= tk < 64 and
260
+ 0 <= xd < 8 and xd = 0 and 0 <= yd < 8 and
261
+ m = (8 * tm) + xd and k=tk
262
+ }
263
+ dims: *3t-dim_2s-dim
264
+ dist_func: *3t-dim_2s-dim_manhattan
265
+ expected:
266
+ latency: null
267
+ total_hops: null
268
+ multicast_hops: null
269
+ hypercube_hops: 28672
270
+ extent_DOR_hops: null
271
+ ##@brief No duplication, full chip to ensure temporal and spatial handling is correct.
272
+ # Edge to Src, full chip.
273
+ - occ: *8x8_edge
274
+ fill: |
275
+ {
276
+ noc[tm, tn, xd, yd] -> A[m, k] :
277
+ 0 <= tm < 8 and 0 <= tn < 8 and tn = 0 and
278
+ 0 <= xd < 8 and 0 <= yd < 8 and
279
+ 0 <= m < 64 and m = (8 * tm) + xd and
280
+ 0 <= k < 64 and 0 = (yd - k) % 8
281
+ }
282
+ dims: *2t-dim_2s-dim
283
+ dist_func: *2t-dim_2s-dim_manhattan
284
+ expected:
285
+ latency: null
286
+ total_hops: null
287
+ multicast_hops: null
288
+ hypercube_hops: 78848
289
+ extent_DOR_hops: null
290
+ # Src to Dst, full chip.
291
+ - occ: |
292
+ {
293
+ noc[tm, tn, tk, xd, yd] -> A[m, k] :
294
+ 0 <= tm < 8 and 0 <= tn < 8 and 0 <= tk < 64 and
295
+ 0 <= xd < 8 and 0 <= yd < 8 and
296
+ 0 <= m < 64 and m = (8 * tm) + xd and
297
+ 0 <= k < 64 and 0 = (yd - k) % 8
298
+ }
299
+ fill: *8x8_pe
300
+ dims: *3t-dim_2s-dim
301
+ dist_func: *3t-dim_2s-dim_manhattan
302
+ expected:
303
+ latency: null
304
+ total_hops: null
305
+ multicast_hops: null
306
+ hypercube_hops: 229376
307
+ extent_DOR_hops: null
308
+ # - occ: "{ noc[xs, ys] -> data[d0, d1] : 0 <= d0 < 8 and 0 <= d1 < 8 and (xs=0 or 3<=xs<=4 or xs=7) and (ys=0 or 3<=ys<=4 or ys=7) }"
309
+ # fill: "{ noc[xd, yd] -> data[d0, d1] : d0 = xd and d1 = yd and 0 <= xd < 8 and 0 <= yd < 8 }"
310
+ # dims: *2d_spatial
311
+ # dist_func: *2d_manhattan
312
+ # expected:
313
+ # latency: 2
314
+ # total_hops: 64
315
+ # multicast_hops: null
316
+ # - occ: "{ noc[xs, ys] -> data[d0, d1] : d0 = xs and d1 = ys and 0 <= xs < 8 and 0 <= ys < 8 }"
317
+ # fill: "{ noc[xd, yd] -> data[d0, d1] : 0 <= d0 < 8 and 0 <= d1 < 8 and (xd=0 or 3<=xd<=4 or xd=7) and (yd=0 or 3<=yd<=4 or yd=7) }"
318
+ # dist_func: *2d_manhattan
319
+ # dims: *2d_spatial
320
+ # expected:
321
+ # latency: 14
322
+ # total_hops: 5632
323
+ # multicast_hops: null
324
+ # - occ: |
325
+ # { noc[xs, ys] -> data[d0, d1] :
326
+ # 0 <= d0 < 3 and 0 <= d1 < 3 and
327
+ # 0 <= xs < 9 and 0 <= ys < 9 and xs % 3 = 1 and ys % 3 = 1
328
+ # }
329
+ # fill: "{ noc[xd, yd] -> data[d0, d1] : d0 = xd % 3 and d1 = yd % 3 and 0 <= xd < 9 and 0 <= yd < 9 }"
330
+ # dims: *2d_spatial
331
+ # dist_func: *2d_manhattan
332
+ # expected:
333
+ # latency: 2
334
+ # total_hops: 108
335
+ # multicast_hops: null
336
+ # - occ: "{ noc[xs, ys] -> data[d0, d1] : d0 = xs % 3 and d1 = ys % 3 and 0 <= xs < 9 and 0 <= ys < 9 }"
337
+ # fill: |
338
+ # { noc[xd, yd] -> data[d0, d1] :
339
+ # 0 <= d0 < 3 and 0 <= d1 < 3 and
340
+ # 0 <= xd < 9 and 0 <= yd < 9 and xd % 3 = 1 and yd % 3 = 1
341
+ # }
342
+ # dims: *2d_spatial
343
+ # dist_func: *2d_manhattan
344
+ # expected:
345
+ # latency: 2
346
+ # total_hops: 108
347
+ # - occ: "{ [xs, ys] -> [d0] : d0=xs and 0 <= xs < 8 and ys = 0 }"
348
+ # fill: "{ [xd, yd] -> [d0] : d0=xd and 0 <= xd < 8 and 0 <= yd < 8 }"
349
+ # dims: *2d_spatial
350
+ # dist_func: *2d_manhattan
351
+ # expected:
352
+ # latency: 7
353
+ # total_hops: 224
354
+ # - occ: "{ [xs, ys] -> [d0] : d0=xs and 0 <= xs < 8 and 0 <= ys < 8 }"
355
+ # fill: "{ [xd, yd] -> [d0] : d0=xd and 0 <= xd < 8 and yd = 0 }"
356
+ # dims: *2d_spatial
357
+ # dist_func: *2d_manhattan
358
+ # expected:
359
+ # latency: 0
360
+ # total_hops: 0
361
+ # - occ: |
362
+ # { [xs, ys, z1s, z2s] -> [d0, d1, d2, d3] :
363
+ # d0 = xs and d1 = ys and d2 = z1s and d3 = z2s and
364
+ # 0 <= xs < 8 and 0 <= ys < 8 and 0 <= z1s < 8 and 0 <= z2s < 8
365
+ # }
366
+ # fill: |
367
+ # { [xd, yd, z1d, z2d] -> [d0, d1, d2, d3] :
368
+ # 0 <= d0 < 8 and 0 <= d1 < 8 and 0 <= d2 < 8 and 0 <= d3 < 8 and
369
+ # xd=0 and yd=0 and z1d=0 and z2d=0
370
+ # }
371
+ # dims: &4d_spatial
372
+ # - Spatial
373
+ # - 0
374
+ # - 0
375
+ # - Spatial
376
+ # - 1
377
+ # - 0
378
+ # - Spatial
379
+ # - 2
380
+ # - 0
381
+ # - Spatial
382
+ # - 3
383
+ # - 0
384
+ # dist_func: &4d_manhattan |
385
+ # {
386
+ # [[xd, yd, z1d, z2d] -> [xs, ys, z1s, z2s]] ->
387
+ # [(xd - xs) + (yd - ys) + (z1d - z1s) + (z2d - z2s)] :
388
+ # (xd >= xs) and (yd >= ys) and (z1d >= z1s) and (z2d >= z2s);
389
+
390
+ # [[xd, yd, z1d, z2d] -> [xs, ys, z1s, z2s]] ->
391
+ # [-(xd - xs) + (yd - ys) + (z1d - z1s) + (z2d - z2s)] :
392
+ # (xd < xs) and (yd >= ys) and (z1d >= z1s) and (z2d >= z2s);
393
+ # [[xd, yd, z1d, z2d] -> [xs, ys, z1s, z2s]] ->
394
+ # [-(xd - xs) + -(yd - ys) + (z1d - z1s) + (z2d - z2s)] :
395
+ # (xd < xs) and (yd < ys) and (z1d >= z1s) and (z2d >= z2s);
396
+ # [[xd, yd, z1d, z2d] -> [xs, ys, z1s, z2s]] ->
397
+ # [-(xd - xs) + -(yd - ys) + -(z1d - z1s) + (z2d - z2s)] :
398
+ # (xd < xs) and (yd < ys) and (z1d < z1s) and (z2d >= z2s);
399
+ # [[xd, yd, z1d, z2d] -> [xs, ys, z1s, z2s]] ->
400
+ # [-(xd - xs) + -(yd - ys) + -(z1d - z1s) + -(z2d - z2s)] :
401
+ # (xd < xs) and (yd < ys) and (z1d < z1s) and (z2d < z2s);
402
+
403
+ # [[xd, yd, z1d, z2d] -> [xs, ys, z1s, z2s]] ->
404
+ # [(xd - xs) + -(yd - ys) + (z1d - z1s) + (z2d - z2s)] :
405
+ # (xd >= xs) and (yd < ys) and (z1d >= z1s) and (z2d >= z2s);
406
+ # [[xd, yd, z1d, z2d] -> [xs, ys, z1s, z2s]] ->
407
+ # [(xd - xs) + -(yd - ys) + -(z1d - z1s) + (z2d - z2s)] :
408
+ # (xd >= xs) and (yd < ys) and (z1d < z1s) and (z2d >= z2s);
409
+ # [[xd, yd, z1d, z2d] -> [xs, ys, z1s, z2s]] ->
410
+ # [(xd - xs) + -(yd - ys) + -(z1d - z1s) + -(z2d - z2s)] :
411
+ # (xd >= xs) and (yd < ys) and (z1d < z1s) and (z2d < z2s);
412
+
413
+ # [[xd, yd, z1d, z2d] -> [xs, ys, z1s, z2s]] ->
414
+ # [(xd - xs) + (yd - ys) + -(z1d - z1s) + (z2d - z2s)] :
415
+ # (xd >= xs) and (yd >= ys) and (z1d < z1s) and (z2d >= z2s);
416
+ # [[xd, yd, z1d, z2d] -> [xs, ys, z1s, z2s]] ->
417
+ # [(xd - xs) + (yd - ys) + -(z1d - z1s) + -(z2d - z2s)] :
418
+ # (xd >= xs) and (yd >= ys) and (z1d < z1s) and (z2d < z2s);
419
+
420
+ # [[xd, yd, z1d, z2d] -> [xs, ys, z1s, z2s]] ->
421
+ # [(xd - xs) + (yd - ys) + (z1d - z1s) + -(z2d - z2s)] :
422
+ # (xd >= xs) and (yd >= ys) and (z1d >= z1s) and (z2d < z2s);
423
+
424
+ # [[xd, yd, z1d, z2d] -> [xs, ys, z1s, z2s]] ->
425
+ # [-(xd - xs) + (yd - ys) + -(z1d - z1s) + -(z2d - z2s)] :
426
+ # (xd < xs) and (yd >= ys) and (z1d < z1s) and (z2d < z2s);
427
+ # [[xd, yd, z1d, z2d] -> [xs, ys, z1s, z2s]] ->
428
+ # [-(xd - xs) + -(yd - ys) + (z1d - z1s) + -(z2d - z2s)] :
429
+ # (xd < xs) and (yd < ys) and (z1d >= z1s) and (z2d < z2s);
430
+ # [[xd, yd, z1d, z2d] -> [xs, ys, z1s, z2s]] ->
431
+ # [-(xd - xs) + (yd - ys) + (z1d - z1s) + -(z2d - z2s)] :
432
+ # (xd < xs) and (yd >= ys) and (z1d >= z1s) and (z2d < z2s);
433
+
434
+ # [[xd, yd, z1d, z2d] -> [xs, ys, z1s, z2s]] ->
435
+ # [-(xd - xs) + (yd - ys) + -(z1d - z1s) + (z2d - z2s)] :
436
+ # (xd < xs) and (yd >= ys) and (z1d < z1s) and (z2d >= z2s);
437
+ # [[xd, yd, z1d, z2d] -> [xs, ys, z1s, z2s]] ->
438
+ # [(xd - xs) + -(yd - ys) + (z1d - z1s) + -(z2d - z2s)] :
439
+ # (xd >= xs) and (yd < ys) and (z1d >= z1s) and (z2d < z2s)
440
+ # }
441
+ # expected:
442
+ # latency: 28
443
+ # total_hops: null
444
+ # - occ: |
445
+ # { [xs, ys, z1s, z2s] -> [d0, d1, d2, d3] :
446
+ # 0 <= d0 < 8 and 0 <= d1 < 8 and 0 <= d2 < 8 and 0 <= d3 < 8 and
447
+ # xs=0 and ys=0 and z1s=0 and z2s=0
448
+ # }
449
+ # fill: |
450
+ # { [xd, yd, z1d, z2d] -> [d0, d1, d2, d3] :
451
+ # d0 = xd and d1 = yd and d2 = z1d and d3 = z2d and
452
+ # 0 <= xd < 8 and 0 <= yd < 8 and 0 <= z1d < 8 and 0 <= z2d < 8
453
+ # }
454
+ # dims: *4d_spatial
455
+ # dist_func: *4d_manhattan
456
+ # expected:
457
+ # latency: 28
458
+ # total_hops: null
459
+ # - occ: "{ [xs] -> [d0] : 0 <= d0 < 8 and xs = 0 }"
460
+ # fill: "{ [xd] -> [d0] : d0 = xd and 0 <= xd < 8 }"
461
+ # dist_func: &ring_dist_size_8 |
462
+ # {
463
+ # [[xd] -> [xs]] -> [(xd-xs) % 8] :
464
+ # (xd-xs)%8 <= (xs-xd)%8;
465
+ # [[xd] -> [xs]] -> [(xs-xd) % 8] :
466
+ # (xd-xs)%8 > (xs-xd)%8
467
+ # }
468
+ # expected:
469
+ # latency: 4
470
+ # total_hops: 16
471
+ # - occ: "{ [xs] -> [d0] : 0 <= xs < 8 and d0 = xs }"
472
+ # fill: "{ [xd] -> [d0] : 0 <= d0 < 8 and xd = 0 }"
473
+ # dist_func: *ring_dist_size_8
474
+ # expected:
475
+ # latency: 4
476
+ # total_hops: 16
477
+ # - occ: "{ [xs] -> [d0] : xs <= d0 <= xs + 1 and xs % 2 = 0}"
478
+ # fill: "{ [xd] -> [d0] : d0 = xd and 0 <= xd < 8 }"
479
+ # dist_func: *ring_dist_size_8
480
+ # expected:
481
+ # latency: 1
482
+ # total_hops: 4
@@ -0,0 +1,97 @@
1
+ - binding:
2
+ nodes:
3
+ - logical:
4
+ name: PE
5
+ l_dims: [i]
6
+ physical:
7
+ name: PE
8
+ p_dims: [x, y]
9
+ relations:
10
+ tensorA: i = x + y * 2 # This is a dimension-major compression into the logical. It is bijective.
11
+ tensorB: i = x + y * 2 # This is a dimension-major compression into the logical. It is bijective.
12
+ - logical:
13
+ name: Scratchpad
14
+ l_dims: [x, y]
15
+ physical:
16
+ name: GLB
17
+ p_dims: [a, b]
18
+ relations:
19
+ tensorA: x = a and y = b
20
+ tensorB: x = b and y = a
21
+ solution:
22
+ nodes:
23
+ -
24
+ tensorA: |
25
+ {
26
+ [
27
+ tensorA_ranks[c, h, w, p, q, r, s] ->
28
+ l_PE_dims[i]
29
+ ] ->
30
+ p_PE_dims[x, y] :
31
+ i = x + 2y
32
+ }
33
+ tensorB: |
34
+ {
35
+ [
36
+ tensorB_ranks[c, h, w, p, q, r, s] ->
37
+ l_PE_dims[i]
38
+ ] ->
39
+ p_PE_dims[x, y] :
40
+ i = x + (y * 2)
41
+ }
42
+ -
43
+ tensorA: |
44
+ {
45
+ [
46
+ tensorA_ranks[c, h, w, p, q, r, s] ->
47
+ l_Scratchpad_dims[x, y]
48
+ ] ->
49
+ p_GLB_dims[ a, b ] :
50
+ x = a and y = b
51
+ }
52
+ tensorB: |
53
+ {
54
+ [
55
+ tensorB_ranks[c, h, w, p, q, r, s] ->
56
+ l_Scratchpad_dims[x, y]
57
+ ] ->
58
+ p_GLB_dims[ a, b ] :
59
+ x = b and y = a
60
+ }
61
+
62
+ - binding:
63
+ nodes:
64
+ - logical:
65
+ name: DRAM
66
+ l_dims: [i]
67
+ physical:
68
+ name: DRAM
69
+ p_dims: [i]
70
+ relation: # Compression relation where less DRAM chips than planned.
71
+ weights: i = i // 2
72
+ inputs: i = i // 2
73
+ outputs: i = i // 2
74
+ - logical:
75
+ name: Scratchpad
76
+ l_dims: [i]
77
+ physical:
78
+ name: GLB
79
+ p_dims: [x, y, z]
80
+ relation: # weight stationary relation
81
+ weights: c = x and h = y and w = z
82
+ inputs: i=x and i=y and i=z
83
+ outputs: i=x and i=y and i=z
84
+ - logical:
85
+ name: PE_Buffer
86
+ l_dims: [i]
87
+ physical:
88
+ name: GLB
89
+ l_dims: [x, y, z]
90
+ relation: # Some weird bypass shenanigans.
91
+ weights:
92
+ inputs:
93
+ outputs:
94
+
95
+
96
+
97
+ -
@@ -0,0 +1,100 @@
1
+ networks:
2
+ - name: L2
3
+ topology: Mesh
4
+ dims:
5
+ - x
6
+ - y
7
+ constraints:
8
+ - 0 <= x < 2
9
+ - 0 <= y < 3
10
+ affine: |
11
+ {
12
+ [L2[x, y] -> L2[x', y']] -> hops[(x - x') + (y - y')] :
13
+ x >= x' and y >= y';
14
+ [L2[x, y] -> L2[x', y']] -> hops[-(x - x') + -(y - y')] :
15
+ x < x' and y < y';
16
+ [L2[x, y] -> L2[x', y']] -> hops[-(x - x') + (y - y')] :
17
+ x < x' and y >= y';
18
+ [L2[x, y] -> L2[x', y']] -> hops[(x - x') + -(y - y')] :
19
+ x >= x' and y < y'
20
+ }
21
+ domain_restriction: |
22
+ {
23
+ [L2[x, y] -> L2[x', y']] -> hops[(x - x') + (y - y')] :
24
+ x >= x' and y >= y' and 0 <= x < 2 and 0 <= x' < 2 and 0 <= y < 3 and 0 <= y' < 3;
25
+ [L2[x, y] -> L2[x', y']] -> hops[-(x - x') + (y - y')] :
26
+ x < x' and y >= y' and 0 <= x < 2 and 0 <= x' < 2 and 0 <= y < 3 and 0 <= y' < 3;
27
+ [L2[x, y] -> L2[x', y']] -> hops[(x - x') + -(y - y')] :
28
+ x >= x' and y < y' and 0 <= x < 2 and 0 <= x' < 2 and 0 <= y < 3 and 0 <= y' < 3;
29
+ [L2[x, y] -> L2[x', y']] -> hops[-(x - x') + -(y - y')] :
30
+ x < x' and y < y' and 0 <= x < 2 and 0 <= x' < 2 and 0 <= y < 3 and 0 <= y' < 3
31
+ }
32
+ - name: L1
33
+ topology: Mesh
34
+ dims:
35
+ - x
36
+ - y
37
+ - z
38
+ constraints:
39
+ - 0 <= x < 2027
40
+ - 0 <= y < 2029
41
+ - 0 <= z < 17
42
+ affine: |
43
+ {
44
+ [L1[x, y, z] -> L1[x', y', z']] -> hops[ (x - x') + (y - y') + (z - z') ] :
45
+ x >= x' and y >= y' and z >= z';
46
+ [L1[x, y, z] -> L1[x', y', z']] -> hops[ (x - x') + (y - y') - (z - z') ] :
47
+ x >= x' and y >= y' and z < z';
48
+ [L1[x, y, z] -> L1[x', y', z']] -> hops[ (x - x') - (y - y') + (z - z') ] :
49
+ x >= x' and y < y' and z >= z';
50
+ [L1[x, y, z] -> L1[x', y', z']] -> hops[ -(x - x') + (y - y') + (z - z') ] :
51
+ x < x' and y >= y' and z >= z';
52
+ [L1[x, y, z] -> L1[x', y', z']] -> hops[ -(x - x') - (y - y') + (z - z') ] :
53
+ x < x' and y < y' and z >= z';
54
+ [L1[x, y, z] -> L1[x', y', z']] -> hops[ (x - x') - (y - y') - (z - z') ] :
55
+ x >= x' and y < y' and z < z';
56
+ [L1[x, y, z] -> L1[x', y', z']] -> hops[ -(x - x') + (y - y') - (z - z') ] :
57
+ x < x' and y >= y' and z < z';
58
+ [L1[x, y, z] -> L1[x', y', z']] -> hops[ -(x - x') - (y - y') - (z - z') ] :
59
+ x < x' and y < y' and z < z'
60
+ }
61
+ domain_restriction: |
62
+ {
63
+ [L1[x, y, z] -> L1[x', y', z']] -> hops[ (x - x') + (y - y') + (z - z') ] :
64
+ x >= x' and y >= y' and z >= z' and
65
+ 0 <= x < 2027 and 0 <= y < 2029 and 0 <= z < 17 and
66
+ 0 <= x' < 2027 and 0 <= y' < 2029 and 0 <= z' < 17;
67
+ [L1[x, y, z] -> L1[x', y', z']] -> hops[ (x - x') + (y - y') - (z - z') ] :
68
+ x >= x' and y >= y' and z < z' and
69
+ 0 <= x < 2027 and 0 <= y < 2029 and 0 <= z < 17 and
70
+ 0 <= x' < 2027 and 0 <= y' < 2029 and 0 <= z' < 17;
71
+ [L1[x, y, z] -> L1[x', y', z']] -> hops[ (x - x') - (y - y') + (z - z') ] :
72
+ x >= x' and y < y' and z >= z' and
73
+ 0 <= x < 2027 and 0 <= y < 2029 and 0 <= z < 17 and
74
+ 0 <= x' < 2027 and 0 <= y' < 2029 and 0 <= z' < 17;
75
+ [L1[x, y, z] -> L1[x', y', z']] -> hops[ -(x - x') + (y - y') + (z - z') ] :
76
+ x < x' and y >= y' and z >= z' and
77
+ 0 <= x < 2027 and 0 <= y < 2029 and 0 <= z < 17 and
78
+ 0 <= x' < 2027 and 0 <= y' < 2029 and 0 <= z' < 17;
79
+ [L1[x, y, z] -> L1[x', y', z']] -> hops[ -(x - x') - (y - y') + (z - z') ] :
80
+ x < x' and y < y' and z >= z' and
81
+ 0 <= x < 2027 and 0 <= y < 2029 and 0 <= z < 17 and
82
+ 0 <= x' < 2027 and 0 <= y' < 2029 and 0 <= z' < 17;
83
+ [L1[x, y, z] -> L1[x', y', z']] -> hops[ (x - x') - (y - y') - (z - z') ] :
84
+ x >= x' and y < y' and z < z' and
85
+ 0 <= x < 2027 and 0 <= y < 2029 and 0 <= z < 17 and
86
+ 0 <= x' < 2027 and 0 <= y' < 2029 and 0 <= z' < 17;
87
+ [L1[x, y, z] -> L1[x', y', z']] -> hops[ -(x - x') + (y - y') - (z - z') ] :
88
+ x < x' and y >= y' and z < z' and
89
+ 0 <= x < 2027 and 0 <= y < 2029 and 0 <= z < 17 and
90
+ 0 <= x' < 2027 and 0 <= y' < 2029 and 0 <= z' < 17;
91
+ [L1[x, y, z] -> L1[x', y', z']] -> hops[ -(x - x') - (y - y') - (z - z') ] :
92
+ x < x' and y < y' and z < z' and
93
+ 0 <= x < 2027 and 0 <= y < 2029 and 0 <= z < 17 and
94
+ 0 <= x' < 2027 and 0 <= y' < 2029 and 0 <= z' < 17
95
+ }
96
+ placement:
97
+ - level: DRAM
98
+ placement: "DRAM[i] -> L2[x, y] : i = x and y = 0"
99
+ - level: MAC
100
+ placement: "MAC[i] -> L1[x, y, z] : i = x and y = 0 and z = 0"