accelforge 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of accelforge might be problematic. Click here for more details.

Files changed (258) hide show
  1. accelforge/__init__.py +21 -0
  2. accelforge/_accelerated_imports.py +16 -0
  3. accelforge/_deprecate/_simanneal/evalmapping.py +271 -0
  4. accelforge/_deprecate/_simanneal/mapspaceglobals.py +298 -0
  5. accelforge/_deprecate/_simanneal/simanneal.py +666 -0
  6. accelforge/_deprecate/_simanneal/tracking.py +105 -0
  7. accelforge/_deprecate/_simanneal/wrappers.py +218 -0
  8. accelforge/_deprecate/_simanneal2/__init__.py +7 -0
  9. accelforge/_deprecate/_simanneal2/simanneal.py +493 -0
  10. accelforge/_deprecate/_simanneal2/tracking.py +116 -0
  11. accelforge/_deprecate/compatibility_util.py +181 -0
  12. accelforge/_deprecate/layerdeduplication/__init__.py +2 -0
  13. accelforge/_deprecate/layerdeduplication/group_similar_einsums.py +160 -0
  14. accelforge/_deprecate/layerdeduplication/grouped_einsums.py +84 -0
  15. accelforge/_deprecate/mapping_filter_tags/__init__.py +2 -0
  16. accelforge/_deprecate/mapping_filter_tags/ffmt.py +212 -0
  17. accelforge/_deprecate/mapping_filter_tags/onesplit.py +24 -0
  18. accelforge/_deprecate/mapping_filter_tags/util.py +24 -0
  19. accelforge/_deprecate/tags.py +69 -0
  20. accelforge/_deprecate/viz/__init__.py +0 -0
  21. accelforge/_deprecate/viz/interactive.py +159 -0
  22. accelforge/_deprecate/viz/reservationtree.py +307 -0
  23. accelforge/_deprecate/viz/ski_slope.py +88 -0
  24. accelforge/_version.py +15 -0
  25. accelforge/examples.py +39 -0
  26. accelforge/frontend/__init__.py +10 -0
  27. accelforge/frontend/_binding.py +129 -0
  28. accelforge/frontend/_workload_isl/__init__.py +2 -0
  29. accelforge/frontend/_workload_isl/_isl.py +149 -0
  30. accelforge/frontend/_workload_isl/_symbolic.py +141 -0
  31. accelforge/frontend/arch copy.py +1544 -0
  32. accelforge/frontend/arch.py +1642 -0
  33. accelforge/frontend/config.py +63 -0
  34. accelforge/frontend/mapper/__init__.py +5 -0
  35. accelforge/frontend/mapper/ffm.py +126 -0
  36. accelforge/frontend/mapper/mapper.py +7 -0
  37. accelforge/frontend/mapper/metrics.py +30 -0
  38. accelforge/frontend/mapping/__init__.py +1 -0
  39. accelforge/frontend/mapping/mapping.py +1736 -0
  40. accelforge/frontend/model.py +14 -0
  41. accelforge/frontend/renames.py +150 -0
  42. accelforge/frontend/spec copy.py +230 -0
  43. accelforge/frontend/spec.py +301 -0
  44. accelforge/frontend/variables.py +12 -0
  45. accelforge/frontend/workload.py +952 -0
  46. accelforge/mapper/FFM/__init__.py +9 -0
  47. accelforge/mapper/FFM/_join_pmappings/__init__.py +0 -0
  48. accelforge/mapper/FFM/_join_pmappings/compatibility.py +653 -0
  49. accelforge/mapper/FFM/_join_pmappings/compress_pmappings.py +140 -0
  50. accelforge/mapper/FFM/_join_pmappings/join_pmappings.py +703 -0
  51. accelforge/mapper/FFM/_join_pmappings/pmapping_dataframe.py +901 -0
  52. accelforge/mapper/FFM/_join_pmappings/pmapping_group.py +337 -0
  53. accelforge/mapper/FFM/_make_pmappings/contraints/__init__.py +0 -0
  54. accelforge/mapper/FFM/_make_pmappings/contraints/constraints.py +360 -0
  55. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/__init__.py +1 -0
  56. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_loops.py +373 -0
  57. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_pmapping_templates.py +463 -0
  58. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_reservations.py +95 -0
  59. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storage_order.py +382 -0
  60. accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storages.py +155 -0
  61. accelforge/mapper/FFM/_make_pmappings/make_pmappings.py +411 -0
  62. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/__init__.py +1 -0
  63. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_pmappings_from_templates.py +407 -0
  64. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_tile_shapes.py +1681 -0
  65. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/run_model.py +170 -0
  66. accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/symbol_relations.py +174 -0
  67. accelforge/mapper/FFM/_make_pmappings/pmapper_job.py +282 -0
  68. accelforge/mapper/FFM/_pareto_df/df_convention.py +273 -0
  69. accelforge/mapper/FFM/_pareto_df/pareto copy.py +836 -0
  70. accelforge/mapper/FFM/_pareto_df/pareto.py +508 -0
  71. accelforge/mapper/FFM/data.py +61 -0
  72. accelforge/mapper/FFM/main copy.py +236 -0
  73. accelforge/mapper/FFM/main.py +208 -0
  74. accelforge/mapper/FFM/mappings.py +510 -0
  75. accelforge/mapper/FFM/pmappings.py +310 -0
  76. accelforge/mapper/__init__.py +4 -0
  77. accelforge/mapper.py +0 -0
  78. accelforge/model/__init__.py +1 -0
  79. accelforge/model/_looptree/__init__.py +0 -0
  80. accelforge/model/_looptree/accesses.py +335 -0
  81. accelforge/model/_looptree/capacity/__init__.py +1 -0
  82. accelforge/model/_looptree/capacity/aggregators.py +36 -0
  83. accelforge/model/_looptree/capacity/capacity.py +47 -0
  84. accelforge/model/_looptree/energy.py +150 -0
  85. accelforge/model/_looptree/equivalent_ranks.py +29 -0
  86. accelforge/model/_looptree/latency/__init__.py +1 -0
  87. accelforge/model/_looptree/latency/latency.py +98 -0
  88. accelforge/model/_looptree/latency/memory.py +120 -0
  89. accelforge/model/_looptree/latency/processors.py +92 -0
  90. accelforge/model/_looptree/mapping_utilities.py +71 -0
  91. accelforge/model/_looptree/reuse/__init__.py +4 -0
  92. accelforge/model/_looptree/reuse/isl/__init__.py +1 -0
  93. accelforge/model/_looptree/reuse/isl/des.py +59 -0
  94. accelforge/model/_looptree/reuse/isl/isl_functions.py +374 -0
  95. accelforge/model/_looptree/reuse/isl/mapping_to_isl/__init__.py +4 -0
  96. accelforge/model/_looptree/reuse/isl/mapping_to_isl/analyze_mapping.py +297 -0
  97. accelforge/model/_looptree/reuse/isl/mapping_to_isl/skews_from_mapping.py +236 -0
  98. accelforge/model/_looptree/reuse/isl/mapping_to_isl/tiling.py +685 -0
  99. accelforge/model/_looptree/reuse/isl/mapping_to_isl/types.py +188 -0
  100. accelforge/model/_looptree/reuse/isl/spatial.py +260 -0
  101. accelforge/model/_looptree/reuse/isl/temporal.py +182 -0
  102. accelforge/model/_looptree/reuse/symbolic/__init__.py +1 -0
  103. accelforge/model/_looptree/reuse/symbolic/symbolic copy 2.py +1346 -0
  104. accelforge/model/_looptree/reuse/symbolic/symbolic copy.py +1408 -0
  105. accelforge/model/_looptree/reuse/symbolic/symbolic.py +1396 -0
  106. accelforge/model/_looptree/run.py +122 -0
  107. accelforge/model/_looptree/types.py +26 -0
  108. accelforge/model/_looptree/visualization/__init__.py +0 -0
  109. accelforge/model/_looptree/visualization/occupancy.py +11 -0
  110. accelforge/model/main.py +222 -0
  111. accelforge/plotting/__init__.py +2 -0
  112. accelforge/plotting/mappings.py +219 -0
  113. accelforge/plotting/specs.py +57 -0
  114. accelforge/util/__init__.py +4 -0
  115. accelforge/util/_base_analysis_types.py +24 -0
  116. accelforge/util/_basetypes.py +1089 -0
  117. accelforge/util/_frozenset.py +36 -0
  118. accelforge/util/_isl.py +29 -0
  119. accelforge/util/_itertools.py +14 -0
  120. accelforge/util/_mathfuncs.py +57 -0
  121. accelforge/util/_parse_expressions.py +339 -0
  122. accelforge/util/_picklecache.py +32 -0
  123. accelforge/util/_setexpressions.py +268 -0
  124. accelforge/util/_sympy/__init__.py +0 -0
  125. accelforge/util/_sympy/broadcast_max.py +18 -0
  126. accelforge/util/_visualization.py +112 -0
  127. accelforge/util/_yaml.py +579 -0
  128. accelforge/util/parallel.py +193 -0
  129. accelforge-0.0.1.dist-info/METADATA +64 -0
  130. accelforge-0.0.1.dist-info/RECORD +258 -0
  131. accelforge-0.0.1.dist-info/WHEEL +5 -0
  132. accelforge-0.0.1.dist-info/licenses/LICENSE +19 -0
  133. accelforge-0.0.1.dist-info/top_level.txt +5 -0
  134. docs/_build/html/_sources/fastfusion.frontend.mapper.rst.txt +37 -0
  135. docs/_build/html/_sources/fastfusion.frontend.rst.txt +70 -0
  136. docs/_build/html/_sources/fastfusion.frontend.workload.rst.txt +21 -0
  137. docs/_build/html/_sources/fastfusion.mapper.FFM.rst.txt +37 -0
  138. docs/_build/html/_sources/fastfusion.mapper.rst.txt +18 -0
  139. docs/_build/html/_sources/fastfusion.rst.txt +20 -0
  140. docs/_build/html/_sources/fastfusion.util.rst.txt +21 -0
  141. docs/_build/html/_sources/index.rst.txt +87 -0
  142. docs/_build/html/_sources/modules.rst.txt +7 -0
  143. docs/_build/html/_sources/notes/citation.rst.txt +45 -0
  144. docs/_build/html/_sources/notes/definitions.rst.txt +43 -0
  145. docs/_build/html/_sources/notes/faqs.rst.txt +39 -0
  146. docs/_build/html/_sources/notes/modeling/accelerator_energy_latency.rst.txt +72 -0
  147. docs/_build/html/_sources/notes/modeling/component_energy_area.rst.txt +96 -0
  148. docs/_build/html/_sources/notes/modeling/mapping.rst.txt +100 -0
  149. docs/_build/html/_sources/notes/modeling.rst.txt +33 -0
  150. docs/_build/html/_sources/notes/parsing/arithmetic_parsing.rst.txt +136 -0
  151. docs/_build/html/_sources/notes/parsing/setexpressions.rst.txt +63 -0
  152. docs/_build/html/_sources/notes/parsing/yaml_parsing.rst.txt +176 -0
  153. docs/_build/html/_sources/notes/quickstart_and_installation.rst.txt +9 -0
  154. docs/_build/html/_sources/notes/spec/architecture.rst.txt +133 -0
  155. docs/_build/html/_sources/notes/spec/mapping.rst.txt +12 -0
  156. docs/_build/html/_sources/notes/spec/workload.rst.txt +83 -0
  157. docs/_build/html/_sources/notes/spec.rst.txt +36 -0
  158. docs/source/_ext/include_attrs.py +213 -0
  159. docs/source/_ext/include_docstring.py +364 -0
  160. docs/source/_ext/include_functions.py +154 -0
  161. docs/source/_ext/include_notebook.py +131 -0
  162. docs/source/_ext/include_yaml.py +119 -0
  163. docs/source/_ext/inherited_attributes.py +222 -0
  164. docs/source/_ext/paths.py +4 -0
  165. docs/source/conf.py +79 -0
  166. examples/arches/compute_in_memory/_include.yaml +74 -0
  167. examples/arches/compute_in_memory/_include_functions.py +229 -0
  168. examples/arches/compute_in_memory/_load_spec.py +57 -0
  169. examples/arches/compute_in_memory/components/c2c_multiplier.py +181 -0
  170. examples/arches/compute_in_memory/components/dac_c2c_r2r.py +605 -0
  171. examples/arches/compute_in_memory/components/misc.py +195 -0
  172. examples/arches/compute_in_memory/components/util/bit_functions.py +51 -0
  173. examples/arches/compute_in_memory/components/zero_comparator.py +92 -0
  174. examples/arches/compute_in_memory/isaac.yaml +233 -0
  175. examples/arches/compute_in_memory/memory_cells/ecram_demo.yaml +63 -0
  176. examples/arches/compute_in_memory/memory_cells/rram_example.yaml +63 -0
  177. examples/arches/compute_in_memory/memory_cells/rram_isaac_isca_2016.yaml +64 -0
  178. examples/arches/compute_in_memory/memory_cells/rram_neurosim_default.yaml +63 -0
  179. examples/arches/compute_in_memory/memory_cells/rram_raella_isca_2023.yaml +70 -0
  180. examples/arches/compute_in_memory/memory_cells/rram_wan_nature_2022.yaml +63 -0
  181. examples/arches/compute_in_memory/memory_cells/sram_colonnade_jssc_2021.yaml +63 -0
  182. examples/arches/compute_in_memory/memory_cells/sram_example.yaml +63 -0
  183. examples/arches/compute_in_memory/memory_cells/sram_jia_jssc_2020.yaml +63 -0
  184. examples/arches/compute_in_memory/memory_cells/sram_sinangil_jssc_2021.yaml +63 -0
  185. examples/arches/compute_in_memory/memory_cells/sram_wang_vlsi_2022.yaml +63 -0
  186. examples/arches/compute_in_memory/wang_vlsi_2022.yaml +289 -0
  187. examples/arches/eyeriss.yaml +68 -0
  188. examples/arches/fanout_variations/at_glb.yaml +31 -0
  189. examples/arches/fanout_variations/at_glb_with_fanout_node.yaml +34 -0
  190. examples/arches/fanout_variations/at_mac.yaml +31 -0
  191. examples/arches/fanout_variations/at_mac_with_constraints.yaml +38 -0
  192. examples/arches/fanout_variations/at_mac_with_fanout_node.yaml +34 -0
  193. examples/arches/nvdla.yaml +47 -0
  194. examples/arches/simple.yaml +28 -0
  195. examples/arches/tpu_v4i.yaml +67 -0
  196. examples/mappings/unfused_matmuls_to_simple.yaml +33 -0
  197. examples/misc/component_annotated.yaml +33 -0
  198. examples/workloads/gpt3_6.7B.yaml +124 -0
  199. examples/workloads/matmuls.yaml +20 -0
  200. examples/workloads/mobilenet_28.yaml +81 -0
  201. examples/workloads/mobilenet_various_separate.yaml +106 -0
  202. examples/workloads/three_matmuls_annotated.yaml +59 -0
  203. notebooks/.ipynb_checkpoints/fastfusion_arch_study_michael-checkpoint.ipynb +359 -0
  204. notebooks/compute_in_memory/_scripts.py +339 -0
  205. notebooks/compute_in_memory/isaac.guide.ipynb +270 -0
  206. notebooks/compute_in_memory/wang_vlsi_2022.ipynb +602 -0
  207. notebooks/paths.py +4 -0
  208. notebooks/tutorials/.ipynb_checkpoints/1_FFM-checkpoint.ipynb +3110 -0
  209. notebooks/tutorials/FFM.ipynb +3498 -0
  210. notebooks/tutorials/_include.py +48 -0
  211. notebooks/tutorials/component_energy_area.ipynb +363 -0
  212. tests/Q_mapping.yaml +38 -0
  213. tests/__init__.py +0 -0
  214. tests/conv.mapping.yaml +27 -0
  215. tests/conv.workload.yaml +13 -0
  216. tests/conv_sym.mapping.yaml +43 -0
  217. tests/copy.mapping.yaml +35 -0
  218. tests/copy.workload.yaml +15 -0
  219. tests/distribuffers/__init__.py +0 -0
  220. tests/distribuffers/multicast/test_cases.yaml +482 -0
  221. tests/distribuffers/spec/binding/valid_bindings.yaml +97 -0
  222. tests/distribuffers/spec/distributed.yaml +100 -0
  223. tests/distribuffers/spec/logical_arch.yaml +32 -0
  224. tests/distribuffers/spec/physical_arch.yaml +69 -0
  225. tests/distribuffers/test_binding.py +48 -0
  226. tests/frontend/__init__.py +0 -0
  227. tests/frontend/test_mapping_viz.py +52 -0
  228. tests/mapper/__init__.py +0 -0
  229. tests/mapper/configs/conv1d/conv1d.mapping.yaml +31 -0
  230. tests/mapper/configs/conv1d/conv1d.workload.yaml +11 -0
  231. tests/mapper/configs/two_conv1d/two_conv1d.expected.yaml +38 -0
  232. tests/mapper/configs/two_conv1d/two_conv1d.mapping.yaml +54 -0
  233. tests/mapper/configs/two_conv1d/two_conv1d.workload.yaml +19 -0
  234. tests/mapper/test_mapping_to_isl.py +90 -0
  235. tests/mapper/test_spatial_reuse_analysis.py +67 -0
  236. tests/mapper/test_temporal_reuse_analysis.py +56 -0
  237. tests/mapper/util.py +58 -0
  238. tests/matmul.mapping.yaml +29 -0
  239. tests/matmul.workload.yaml +12 -0
  240. tests/matmul_spatial.mapping.yaml +44 -0
  241. tests/mha.renames.yaml +65 -0
  242. tests/mha.workload.yaml +67 -0
  243. tests/mha.yaml +59 -0
  244. tests/mha_full.workload.yaml +67 -0
  245. tests/mobilenet.workload.yaml +35 -0
  246. tests/mobilenet_long.workload.yaml +64 -0
  247. tests/pmappingcache.py +24 -0
  248. tests/processing_stage.arch.yaml +40 -0
  249. tests/snowcat.arch.yaml +36 -0
  250. tests/test_ffm_join_pmappings.py +106 -0
  251. tests/test_ffm_make_pmappings.py +82 -0
  252. tests/test_ffm_make_tile_shapes.py +49 -0
  253. tests/test_mapper.py +100 -0
  254. tests/test_model.py +37 -0
  255. tests/test_plotting.py +72 -0
  256. tests/test_processing_stage.py +46 -0
  257. tests/test_symbolic_model.py +248 -0
  258. tests/test_workload.py +141 -0
@@ -0,0 +1,508 @@
1
+ import functools
2
+ from math import prod
3
+ import time
4
+
5
+ import pandas as pd
6
+
7
+ from paretoset import paretoset
8
+ from joblib import delayed
9
+ from sympy import factorint
10
+
11
+ from accelforge._accelerated_imports import np
12
+ from accelforge.util.parallel import parallel
13
+
14
+ from accelforge.mapper.FFM._pareto_df.df_convention import (
15
+ col_used_in_pareto,
16
+ is_fused_loop_col,
17
+ is_n_iterations_col,
18
+ is_objective_col,
19
+ )
20
+
21
+
22
+ def dominates(a: pd.Series, b: pd.Series) -> bool:
23
+ return all(a[i] <= b[i] for i in range(len(a)))
24
+
25
+
26
+ def check_dominance(df: pd.DataFrame, n_optimal: int):
27
+ # mask = np.zeros(len(df), dtype=bool)
28
+ # mask[:new_point] = True
29
+ mask = np.zeros(len(df) - n_optimal, dtype=bool)
30
+ for col in df.columns:
31
+ compare = df.iloc[n_optimal - 1][col]
32
+ mask = mask | (df[col].iloc[n_optimal:] < compare)
33
+ return np.concatenate([np.ones(n_optimal, dtype=bool), mask])
34
+
35
+
36
+ def quickpareto(df: pd.DataFrame) -> pd.DataFrame:
37
+ # Step 1: Sort by the column with the most unique values
38
+ # Step 2: Extract the first row. Add it to the pareto set
39
+ # Step 3: Remove all dominated points
40
+ # Step 4: Repeat until no more points to add
41
+
42
+ # Step 1: Sort by the column with the most unique values
43
+ original_len = len(df)
44
+ col_to_sort = max(df.columns, key=lambda c: df[c].nunique())
45
+ df = df.sort_values(by=col_to_sort).drop(columns=[col_to_sort])
46
+
47
+ new_point = 0
48
+ while new_point < len(df):
49
+ mask = check_dominance(df, new_point + 1)
50
+ df = df[mask]
51
+ new_point += 1
52
+
53
+ # Turn the index into a mask
54
+ mask = np.zeros(original_len, dtype=bool)
55
+ mask[df.index] = True
56
+ return mask
57
+
58
+
59
+ def makepareto_quick2(mappings: pd.DataFrame, columns: list[str]) -> pd.DataFrame:
60
+ from fast_pareto import is_pareto_front
61
+
62
+ m2 = mappings[columns]
63
+ m2 = m2[is_pareto_front(m2.to_numpy())].drop_duplicates()
64
+ return mappings.loc[m2.index]
65
+
66
+
67
+ def makepareto_quick(mappings: pd.DataFrame, columns: list[str]) -> pd.DataFrame:
68
+ return mappings[quickpareto(mappings[columns])]
69
+
70
+
71
+ def paretofy_chunk(chunk, sense: list[str]):
72
+ return paretoset(chunk, sense=sense)
73
+
74
+
75
+ def makepareto_merge(
76
+ mappings: pd.DataFrame,
77
+ columns: list[str],
78
+ parallelize: bool = False,
79
+ split_by_cols: list[str] = (),
80
+ ) -> pd.DataFrame:
81
+ chunk_size = 10000
82
+ if len(mappings) <= 1:
83
+ return mappings
84
+
85
+ sense = ["min"] * len(columns) + ["diff"] * len(split_by_cols)
86
+
87
+ to_chunk = mappings[columns + list(split_by_cols)]
88
+ chunks = parallel(
89
+ [
90
+ delayed(paretofy_chunk)(chunk, sense)
91
+ for chunk in [
92
+ to_chunk[i : i + chunk_size]
93
+ for i in range(0, len(to_chunk), chunk_size)
94
+ ]
95
+ ],
96
+ n_jobs=1 if parallelize else None,
97
+ )
98
+ mappings = mappings[np.concatenate(chunks)]
99
+ return mappings[paretoset(mappings[columns + list(split_by_cols)], sense=sense)]
100
+
101
+
102
+ def makepareto_time_compare(mappings: pd.DataFrame, columns: list[str]) -> pd.DataFrame:
103
+ t0 = time.time()
104
+ pareto = makepareto_merge(mappings, columns)
105
+ t1 = time.time()
106
+ merge_time = t1 - t0
107
+ print(
108
+ f"Time to make pareto with merge: {t1 - t0: .2f}. Number of pareto points: {len(pareto)}"
109
+ )
110
+
111
+ t0 = time.time()
112
+ pareto2 = makepareto_quick2(mappings, columns)
113
+ t1 = time.time()
114
+ print(
115
+ f"Time to make pareto with quick: {t1 - t0: .2f}. Number of pareto points: {len(pareto2)}"
116
+ )
117
+ quick_time = t1 - t0
118
+
119
+ print(f"Quick is {quick_time / merge_time: .2f}x slower")
120
+
121
+ if len(pareto) != len(pareto2):
122
+ print(f"mismatch: {len(pareto)} != {len(pareto2)}")
123
+ makepareto_quick2(mappings)
124
+
125
+ return pareto2
126
+
127
+
128
+ # 2d. Blockwise vectorized CuPy Pareto front with sorting by one objective (full check)
129
+ # 2c. Fully vectorized CuPy brute-force Pareto front
130
+ # (returns numpy mask for compatibility)
131
+ def pareto_front_cupy_vectorized(X):
132
+ # if len(X) > 1000:
133
+ # return X[paretoset(X.get(), sense=["min"] * X.shape[1])]
134
+
135
+ # Broadcast X_gpu to (n, n, m) for all-pairs comparison
136
+ A = X[:, None, :] # shape (n, 1, m)
137
+ B = X[None, :, :] # shape (1, n, m)
138
+ less_equal = (B <= A).all(axis=2) # shape (n, n)
139
+ strictly_less = (B < A).any(axis=2) # shape (n, n)
140
+ dominated = less_equal & strictly_less # shape (n, n)
141
+ is_pareto = ~dominated.any(axis=1)
142
+ return is_pareto
143
+
144
+
145
+ # 2d. Recursive blockwise merge CuPy Pareto front with sorting by one objective
146
+ def pareto_front_cupy_blockwise_sorted_recursive(X, block_size=2000):
147
+ N = X.shape[0]
148
+ if N <= block_size:
149
+ # Base case: just compute Pareto front directly
150
+ mask = pareto_front_cupy_vectorized(X)
151
+ return mask
152
+ # Split into two halves
153
+ mid = N // 2
154
+ a, b = X[:mid], X[mid:]
155
+ mask_a = pareto_front_cupy_blockwise_sorted_recursive(a, block_size)
156
+ mask_b = pareto_front_cupy_blockwise_sorted_recursive(b, block_size)
157
+ # Get Pareto-optimal points from both halves
158
+ pareto_points_a = a[mask_a]
159
+ pareto_points_b = b[mask_b]
160
+ merged_points = np.vstack([pareto_points_a, pareto_points_b])
161
+ # Compute Pareto front of the merged set
162
+ merged_mask = pareto_front_cupy_vectorized(merged_points)
163
+ merged_indices = np.where(merged_mask)[0]
164
+ # Map merged_indices back to the original indices in X
165
+ # First, get the indices in X for the merged points
166
+ indices_a = np.where(mask_a)[0]
167
+ indices_b = np.where(mask_b)[0] + mid
168
+ all_indices = np.concatenate([indices_a, indices_b])
169
+ merged_indices_in_X = all_indices[merged_indices]
170
+ # Build the final mask for X
171
+ mask = np.zeros(N, dtype=bool)
172
+ mask[merged_indices_in_X] = True
173
+ return mask
174
+
175
+
176
+ # def makepareto(
177
+ # mappings: pd.DataFrame,
178
+ # columns: list[str] = None,
179
+ # parallelize: bool = False,
180
+ # split_by_cols: list[str] = (),
181
+ # ) -> pd.DataFrame:
182
+ # # return makepareto_time_compare(mappings)
183
+ # if columns is None:
184
+ # columns = [c for c in mappings.columns if col_used_in_pareto(c)]
185
+ # if _accelerated_imports.ACCELERATED:
186
+ # mask = pareto_front_cupy_blockwise_sorted_recursive(mappings[columns].to_cupy())
187
+ # return mappings[mask]
188
+
189
+
190
+ TOLERANCE = 0.0
191
+
192
+
193
+ def logify(x: pd.Series) -> pd.Series:
194
+ if 0 < TOLERANCE < 1:
195
+ pass
196
+ else:
197
+ assert (
198
+ TOLERANCE == 0
199
+ ), f"Tolerance must be between 0 and 1. Tolerance {TOLERANCE} is invalid."
200
+ return x
201
+
202
+ if x.min() <= 0:
203
+ return x
204
+
205
+ logged = np.log(x)
206
+
207
+ return np.round(logged / TOLERANCE) * TOLERANCE
208
+
209
+
210
+ def makepareto(
211
+ mappings: pd.DataFrame,
212
+ columns: list[str] = None,
213
+ parallelize: bool = False,
214
+ split_by_cols: list[str] = (),
215
+ ) -> pd.DataFrame:
216
+ # return makepareto_time_compare(mappings)
217
+ if columns is None:
218
+ columns = [c for c in mappings.columns if col_used_in_pareto(c)]
219
+
220
+ # Number of iterations is derived from the tile shapes, so we don't need to use it,
221
+ # since any row with the same tile shapes will have the same number of iterations.
222
+ split_by_cols = list(split_by_cols) + [
223
+ c
224
+ for c in mappings.columns
225
+ if is_fused_loop_col(c) and not is_n_iterations_col(c)
226
+ ]
227
+
228
+ goals = []
229
+ to_pareto = []
230
+ pareto_cols = []
231
+ for c in mappings.columns:
232
+ if mappings[c].nunique() <= 1:
233
+ continue
234
+
235
+ if c in columns and is_objective_col(c): # or col_used_in_pareto(c)):
236
+ to_pareto.append(logify(mappings[c]))
237
+ pareto_cols.append(c)
238
+ goals += ["min"]
239
+ elif c in split_by_cols:
240
+ to_pareto.append(mappings[c])
241
+ pareto_cols.append(c)
242
+ goals.append("diff")
243
+ elif c in columns:
244
+ to_pareto.append(mappings[c])
245
+ pareto_cols.append(c)
246
+ goals.append("min")
247
+
248
+ if not to_pareto:
249
+ return mappings.iloc[0:1]
250
+
251
+ return mappings[paretoset(pd.concat(to_pareto, axis=1), sense=goals)]
252
+
253
+ f = pd.concat(to_pareto, axis=1)
254
+ x = list(f.groupby([c for c, d in zip(pareto_cols, goals) if d == "diff"]))
255
+ print(x)
256
+
257
+
258
+ @functools.lru_cache(maxsize=10000)
259
+ def _factorint_cached(x: int):
260
+ return factorint(x)
261
+
262
+
263
+ def prime_factor_counts(arr: np.ndarray) -> np.ndarray:
264
+ arr = np.asarray(arr, dtype=int)
265
+ unique_vals = np.unique(arr)
266
+ factorizations = {x: _factorint_cached(x) for x in unique_vals}
267
+
268
+ # Gather all unique primes
269
+ all_primes = sorted({p for f in factorizations.values() for p in f})
270
+
271
+ # Build result matrix
272
+ result = np.zeros((len(arr), len(all_primes)), dtype=int)
273
+ prime_index = {p: j for j, p in enumerate(all_primes)}
274
+
275
+ for i, x in enumerate(arr):
276
+ for p, exp in factorizations[x].items():
277
+ result[i, prime_index[p]] = exp
278
+
279
+ return result
280
+
281
+
282
+ def paretoset_grouped_dirty(df: pd.DataFrame, sense: list[str]):
283
+ # return paretoset(df, sense=sense)
284
+
285
+ assert all(i == c for i, c in enumerate(df.columns))
286
+ assert len(sense) == len(df.columns)
287
+
288
+ from paretoset.algorithms_numba import paretoset_jit
289
+ from paretoset.algorithms_numba import BNL
290
+
291
+ for c in df.columns:
292
+ if sense[c] == "max":
293
+ df[c] = -df[c]
294
+ sense[c] = "min"
295
+
296
+ GROUP_SIZE = 128
297
+
298
+ group_by = [c for c in df.columns if sense[c] == "diff"]
299
+ n_groups = prod(len(df[c].unique()) for c in group_by)
300
+
301
+ if len(df) / n_groups < GROUP_SIZE:
302
+ return paretoset(df, sense=sense)
303
+
304
+ c2unique = {c: len(df[c].unique()) for c in df.columns if c not in group_by}
305
+ while c2unique:
306
+ col, n = min(c2unique.items(), key=lambda x: x[1])
307
+ c2unique.pop(col)
308
+ n_groups *= n
309
+ if len(df) / n_groups < GROUP_SIZE:
310
+ break
311
+ group_by.append(col)
312
+
313
+ n_diffs = sum(x == "diff" for x in sense)
314
+ if len(group_by) < 2 or len(group_by) == n_diffs:
315
+ return paretoset(df, sense=sense)
316
+
317
+ def _row_from_group(mins, group):
318
+ per_col_mins = group.min(axis=0)
319
+ per_col_maxs = group.max(axis=0)
320
+ good_row = group.iloc[
321
+ np.argmin((group ** (1 / len(group.columns))).prod(axis=1))
322
+ ]
323
+ return [mins, per_col_mins, per_col_maxs, good_row, group]
324
+
325
+ groups = list(df.groupby(group_by))
326
+ groups_by_diff = {}
327
+ keepcols = [c for c in df.columns if c not in group_by]
328
+ for x, group in groups:
329
+ diffs, mins = x[:n_diffs], x[n_diffs:]
330
+ group = group[keepcols]
331
+ groups_by_diff.setdefault(diffs, []).append(_row_from_group(mins, group))
332
+
333
+ # print(f'Grouped into {len(groups)} groups using {len(group_by)} columns')
334
+ # orig_size = len(df)
335
+ # n_groups = len(groups)
336
+ # n_cols = len(keepcols)
337
+ # new_size = sum(len(g2) for g in groups_by_diff.values() for _, _, _, g2 in g)
338
+ # print(f'Grouped into {n_groups} groups, {orig_size} -> {new_size} rows, {n_cols} columns. Remaining {len(keepcols)} columns')
339
+
340
+ for groups in groups_by_diff.values():
341
+ for i, (
342
+ mins_a,
343
+ per_col_mins_a,
344
+ per_col_maxs_a,
345
+ good_row_a,
346
+ group_a,
347
+ ) in enumerate(groups):
348
+ if group_a is None:
349
+ continue
350
+
351
+ for j, (
352
+ mins_b,
353
+ per_col_mins_b,
354
+ per_col_maxs_b,
355
+ good_row_b,
356
+ group_b,
357
+ ) in enumerate(groups):
358
+ if group_b is None or i == j:
359
+ continue
360
+
361
+ if all(a <= b for a, b in zip(good_row_a, per_col_mins_b)):
362
+ groups[j][-1] = None
363
+ continue
364
+
365
+ if all(a <= b for a, b in zip(good_row_a, good_row_b)):
366
+ # The good row of a dominates the good row of b. It'll likely
367
+ # dominate many b!
368
+ group_b = group_b[(group_b < good_row_a).any(axis=1)]
369
+ if len(group_b) == 0:
370
+ groups[j][-1] = None
371
+ continue
372
+ groups[j].clear()
373
+ groups[j].extend(_row_from_group(mins_b, group_b))
374
+
375
+ # # a can only dominate b if all of the min columns dominate
376
+ # if not all(a <= b for a, b in zip(mins_a, mins_b)):
377
+ # continue
378
+
379
+ # # Check if any b beats all a. If so, continue.
380
+ # if any(a > b for a, b in zip(per_col_mins_a, per_col_maxs_b)):
381
+ # continue
382
+
383
+ # # # Check if any a beats every b. If so, get rid of b.
384
+ # # a_doms = all(a <= b for a, b in zip(per_col_maxs_a, per_col_mins_b))
385
+ # # if a_doms:
386
+ # # groups[j][-1] = None
387
+ # # # print(f'Dropping dominated group {j}')
388
+ # # continue
389
+
390
+ # row_a = group_a.iloc[np.random.randint(len(group_a))]
391
+ # if all(a <= b for a, b in zip(row_w_min_first_obj_b, per_col_mins_b)):
392
+ # groups[j][-1] = None
393
+
394
+ # Everything below just ended up making things slower
395
+
396
+ # if any(a > b for a, b in zip(row_a, per_col_maxs_b)):
397
+ # continue
398
+
399
+ # continue
400
+
401
+ # # Grab a random a. Get rid of all b that are dominated by it.
402
+ # a_lt_b_maxes = group_a.iloc[
403
+ # np.where(np.all(group_a <= per_col_maxs_b, axis=1))[0]
404
+ # ]
405
+ # if len(a_lt_b_maxes) == 0:
406
+ # continue
407
+
408
+ # row_a = a_lt_b_maxes.iloc[np.random.randint(len(a_lt_b_maxes))]
409
+
410
+ # b_idx = np.where(np.any(group_b < row_a, axis=1))[0]
411
+ # if len(b_idx) == 0:
412
+ # groups[j][-1] = None
413
+ # else:
414
+ # groups[j][-1] = group_b.iloc[b_idx]
415
+ # groups[j][1] = group_b.iloc[b_idx].min(axis=0)
416
+ # groups[j][2] = group_b.iloc[b_idx].max(axis=0)
417
+
418
+ # # Now we're in a case where a may dominate b. Update b.
419
+ # catted = pd.concat([group_a, group_b], axis=0)
420
+ # mask = np.concatenate([
421
+ # np.zeros(len(group_a), dtype=bool),
422
+ # np.ones(len(group_b), dtype=bool)
423
+ # ])
424
+ # catted = catted[paretoset_jit(catted.to_numpy()) & mask]
425
+ # groups[j][1] = catted.min(axis=0)
426
+ # groups[j][2] = catted.max(axis=0)
427
+ # groups[j][3] = catted
428
+
429
+ result = np.zeros(len(df), dtype=bool)
430
+ for group in groups_by_diff.values():
431
+ for _, _, _, _, group in group:
432
+ if group is not None:
433
+ result[group[paretoset_jit(group.to_numpy())].index] = True
434
+
435
+ return result
436
+
437
+
438
+ def makepareto_numpy(
439
+ mappings: np.ndarray,
440
+ goals: list[str],
441
+ dirty: bool = False,
442
+ ) -> pd.DataFrame:
443
+
444
+ to_pareto = []
445
+ new_goals = []
446
+ assert len(goals) == mappings.shape[1]
447
+ for c in range(mappings.shape[1]):
448
+ if len(np.unique(mappings[:, c])) <= 1:
449
+ continue
450
+
451
+ goal = goals[c]
452
+ # if goal != "diff" and dirty and len(np.unique(mappings[:, c])) < np.log2(mappings.shape[0]):
453
+ # # print(f"Changed {goal} to diff because there are {len(np.unique(mappings[:, c]))} unique values for {mappings.shape[0]} rows")
454
+ # goal = "diff"
455
+
456
+ if goal in ["min", "max"]:
457
+ l = logify(mappings[:, c].reshape((-1, 1)))
458
+ to_pareto.append(l if goal == "min" else -l)
459
+ new_goals.append("min")
460
+ elif goal == "diff":
461
+ to_pareto.append(mappings[:, c].reshape((-1, 1)))
462
+ new_goals.append("diff")
463
+ elif goal == "min_per_prime_factor":
464
+ if not dirty:
465
+ # Paretoset tends to be faster with these as diffs. Tanner tried for a
466
+ # long time to get min_per_prime_factor to be faster, but it
467
+ # didn't work. What it would do is say that if one choice for an inner
468
+ # loop has used up fewer of every prime factor than another choice, then
469
+ # the latter would give a superset of options for outer loops.
470
+ # Intuitively, we could enable more pruning by doing this instead of
471
+ # "diff", which is overconservative. Likewise, we could do "min" for
472
+ # imperfect instead of "diff". However, this ultimately made things
473
+ # slower because it didn't get much Pareto pruning, but caused many more
474
+ # Pareto comparisons ("diff" partitioning into N partitions --> N^2
475
+ # improvement). I hypothesize that the reason that it doesn't improve
476
+ # pruning much is that when we've enumerated a loop but not the loop
477
+ # above it, the given loop is almost always trading off tile shape for
478
+ # accesses, leading to no point being dominated by another point.
479
+ to_pareto.append(mappings[:, c].reshape((-1, 1)))
480
+ new_goals.append("diff")
481
+ else:
482
+ counts = prime_factor_counts(mappings[:, c])
483
+ for i in range(counts.shape[1]):
484
+ to_pareto.append(counts[:, i].reshape((-1, 1)))
485
+ new_goals.append("min")
486
+ elif goal == "max_per_prime_factor":
487
+ if not dirty:
488
+ # See above big comment.
489
+ to_pareto.append(mappings[:, c].reshape((-1, 1)))
490
+ new_goals.append("diff")
491
+ else:
492
+ counts = prime_factor_counts(mappings[:, c])
493
+ for i in range(counts.shape[1]):
494
+ to_pareto.append(counts[:, i].reshape((-1, 1)))
495
+ new_goals.append("max")
496
+ else:
497
+ raise ValueError(f"Unknown goal: {goal}")
498
+
499
+ if not to_pareto:
500
+ n = np.zeros(mappings.shape[0], dtype=bool)
501
+ n[0] = True
502
+ return n
503
+
504
+ df = pd.DataFrame(np.concatenate(to_pareto, axis=1), columns=range(len(to_pareto)))
505
+
506
+ if dirty:
507
+ return paretoset_grouped_dirty(df, sense=new_goals)
508
+ return paretoset(df, sense=new_goals)
@@ -0,0 +1,61 @@
1
+ """
2
+ Results from mapping exploration.
3
+ """
4
+
5
+ import pandas as pd
6
+
7
+ from accelforge.mapper.FFM._pareto_df.df_convention import col2action
8
+ from accelforge.util._base_analysis_types import ActionKey, VerboseActionKey
9
+
10
+
11
+ class ResultDataFrame(pd.DataFrame):
12
+ @property
13
+ def _constructor(self):
14
+ return ResultDataFrame
15
+
16
+ @property
17
+ def _constructor_sliced(self):
18
+ return pd.Series
19
+
20
+ @property
21
+ def actions(self) -> "ResultDataFrame":
22
+ """Returns a ResultDataFrame with all action-related columns."""
23
+ action_columns = [col for col in self.columns if "action" in col]
24
+ return self[[action_columns]]
25
+
26
+ @property
27
+ def actions_df(self) -> "ActionDataFrame":
28
+ """Return an ActionDataFrame."""
29
+ df = self.actions
30
+ if any(isinstance(col2action(col), VerboseActionKey) for col in df.columns):
31
+ columns = [
32
+ col
33
+ for col in df.columns
34
+ if isinstance(col2action(col), VerboseActionKey)
35
+ ]
36
+
37
+ @property
38
+ def energy(self) -> "ResultDataFrame":
39
+ """Returns a ResultDataFrame with all energy-related columns."""
40
+ action_columns = [col for col in self.columns if "energy" in col]
41
+ return self[[action_columns]]
42
+
43
+
44
+ class ActionDataFrame(pd.DataFrame):
45
+ """
46
+ A hierarchical column dataframe with action counts.
47
+ """
48
+
49
+ @property
50
+ def _constructor(self):
51
+ return ResultDataFrame
52
+
53
+ @property
54
+ def _constructor_sliced(self):
55
+ return pd.Series
56
+
57
+
58
+ class VerboseActionDataFrame(pd.DataFrame):
59
+ """
60
+ A hierarchical column dataframe with verbose action counts.
61
+ """