asmjit 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (201) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +1 -1
  3. data/asmjit.gemspec +1 -1
  4. data/ext/asmjit/asmjit/.editorconfig +10 -0
  5. data/ext/asmjit/asmjit/.github/FUNDING.yml +1 -0
  6. data/ext/asmjit/asmjit/.github/workflows/build-config.json +47 -0
  7. data/ext/asmjit/asmjit/.github/workflows/build.yml +156 -0
  8. data/ext/asmjit/asmjit/.gitignore +6 -0
  9. data/ext/asmjit/asmjit/CMakeLists.txt +611 -0
  10. data/ext/asmjit/asmjit/LICENSE.md +17 -0
  11. data/ext/asmjit/asmjit/README.md +69 -0
  12. data/ext/asmjit/asmjit/src/asmjit/a64.h +62 -0
  13. data/ext/asmjit/asmjit/src/asmjit/arm/a64archtraits_p.h +81 -0
  14. data/ext/asmjit/asmjit/src/asmjit/arm/a64assembler.cpp +5115 -0
  15. data/ext/asmjit/asmjit/src/asmjit/arm/a64assembler.h +72 -0
  16. data/ext/asmjit/asmjit/src/asmjit/arm/a64builder.cpp +51 -0
  17. data/ext/asmjit/asmjit/src/asmjit/arm/a64builder.h +57 -0
  18. data/ext/asmjit/asmjit/src/asmjit/arm/a64compiler.cpp +60 -0
  19. data/ext/asmjit/asmjit/src/asmjit/arm/a64compiler.h +247 -0
  20. data/ext/asmjit/asmjit/src/asmjit/arm/a64emithelper.cpp +464 -0
  21. data/ext/asmjit/asmjit/src/asmjit/arm/a64emithelper_p.h +50 -0
  22. data/ext/asmjit/asmjit/src/asmjit/arm/a64emitter.h +1228 -0
  23. data/ext/asmjit/asmjit/src/asmjit/arm/a64formatter.cpp +298 -0
  24. data/ext/asmjit/asmjit/src/asmjit/arm/a64formatter_p.h +59 -0
  25. data/ext/asmjit/asmjit/src/asmjit/arm/a64func.cpp +189 -0
  26. data/ext/asmjit/asmjit/src/asmjit/arm/a64func_p.h +33 -0
  27. data/ext/asmjit/asmjit/src/asmjit/arm/a64globals.h +1894 -0
  28. data/ext/asmjit/asmjit/src/asmjit/arm/a64instapi.cpp +278 -0
  29. data/ext/asmjit/asmjit/src/asmjit/arm/a64instapi_p.h +41 -0
  30. data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb.cpp +1957 -0
  31. data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb.h +74 -0
  32. data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb_p.h +876 -0
  33. data/ext/asmjit/asmjit/src/asmjit/arm/a64operand.cpp +85 -0
  34. data/ext/asmjit/asmjit/src/asmjit/arm/a64operand.h +312 -0
  35. data/ext/asmjit/asmjit/src/asmjit/arm/a64rapass.cpp +852 -0
  36. data/ext/asmjit/asmjit/src/asmjit/arm/a64rapass_p.h +105 -0
  37. data/ext/asmjit/asmjit/src/asmjit/arm/a64utils.h +179 -0
  38. data/ext/asmjit/asmjit/src/asmjit/arm/armformatter.cpp +143 -0
  39. data/ext/asmjit/asmjit/src/asmjit/arm/armformatter_p.h +44 -0
  40. data/ext/asmjit/asmjit/src/asmjit/arm/armglobals.h +21 -0
  41. data/ext/asmjit/asmjit/src/asmjit/arm/armoperand.h +621 -0
  42. data/ext/asmjit/asmjit/src/asmjit/arm.h +62 -0
  43. data/ext/asmjit/asmjit/src/asmjit/asmjit-scope-begin.h +17 -0
  44. data/ext/asmjit/asmjit/src/asmjit/asmjit-scope-end.h +9 -0
  45. data/ext/asmjit/asmjit/src/asmjit/asmjit.h +33 -0
  46. data/ext/asmjit/asmjit/src/asmjit/core/api-build_p.h +55 -0
  47. data/ext/asmjit/asmjit/src/asmjit/core/api-config.h +613 -0
  48. data/ext/asmjit/asmjit/src/asmjit/core/archcommons.h +229 -0
  49. data/ext/asmjit/asmjit/src/asmjit/core/archtraits.cpp +160 -0
  50. data/ext/asmjit/asmjit/src/asmjit/core/archtraits.h +290 -0
  51. data/ext/asmjit/asmjit/src/asmjit/core/assembler.cpp +406 -0
  52. data/ext/asmjit/asmjit/src/asmjit/core/assembler.h +129 -0
  53. data/ext/asmjit/asmjit/src/asmjit/core/builder.cpp +889 -0
  54. data/ext/asmjit/asmjit/src/asmjit/core/builder.h +1391 -0
  55. data/ext/asmjit/asmjit/src/asmjit/core/codebuffer.h +113 -0
  56. data/ext/asmjit/asmjit/src/asmjit/core/codeholder.cpp +1149 -0
  57. data/ext/asmjit/asmjit/src/asmjit/core/codeholder.h +1035 -0
  58. data/ext/asmjit/asmjit/src/asmjit/core/codewriter.cpp +175 -0
  59. data/ext/asmjit/asmjit/src/asmjit/core/codewriter_p.h +179 -0
  60. data/ext/asmjit/asmjit/src/asmjit/core/compiler.cpp +582 -0
  61. data/ext/asmjit/asmjit/src/asmjit/core/compiler.h +737 -0
  62. data/ext/asmjit/asmjit/src/asmjit/core/compilerdefs.h +173 -0
  63. data/ext/asmjit/asmjit/src/asmjit/core/constpool.cpp +363 -0
  64. data/ext/asmjit/asmjit/src/asmjit/core/constpool.h +250 -0
  65. data/ext/asmjit/asmjit/src/asmjit/core/cpuinfo.cpp +1162 -0
  66. data/ext/asmjit/asmjit/src/asmjit/core/cpuinfo.h +813 -0
  67. data/ext/asmjit/asmjit/src/asmjit/core/emithelper.cpp +323 -0
  68. data/ext/asmjit/asmjit/src/asmjit/core/emithelper_p.h +58 -0
  69. data/ext/asmjit/asmjit/src/asmjit/core/emitter.cpp +333 -0
  70. data/ext/asmjit/asmjit/src/asmjit/core/emitter.h +741 -0
  71. data/ext/asmjit/asmjit/src/asmjit/core/emitterutils.cpp +129 -0
  72. data/ext/asmjit/asmjit/src/asmjit/core/emitterutils_p.h +89 -0
  73. data/ext/asmjit/asmjit/src/asmjit/core/environment.cpp +46 -0
  74. data/ext/asmjit/asmjit/src/asmjit/core/environment.h +508 -0
  75. data/ext/asmjit/asmjit/src/asmjit/core/errorhandler.cpp +14 -0
  76. data/ext/asmjit/asmjit/src/asmjit/core/errorhandler.h +228 -0
  77. data/ext/asmjit/asmjit/src/asmjit/core/formatter.cpp +584 -0
  78. data/ext/asmjit/asmjit/src/asmjit/core/formatter.h +247 -0
  79. data/ext/asmjit/asmjit/src/asmjit/core/formatter_p.h +34 -0
  80. data/ext/asmjit/asmjit/src/asmjit/core/func.cpp +286 -0
  81. data/ext/asmjit/asmjit/src/asmjit/core/func.h +1445 -0
  82. data/ext/asmjit/asmjit/src/asmjit/core/funcargscontext.cpp +293 -0
  83. data/ext/asmjit/asmjit/src/asmjit/core/funcargscontext_p.h +199 -0
  84. data/ext/asmjit/asmjit/src/asmjit/core/globals.cpp +133 -0
  85. data/ext/asmjit/asmjit/src/asmjit/core/globals.h +393 -0
  86. data/ext/asmjit/asmjit/src/asmjit/core/inst.cpp +113 -0
  87. data/ext/asmjit/asmjit/src/asmjit/core/inst.h +772 -0
  88. data/ext/asmjit/asmjit/src/asmjit/core/jitallocator.cpp +1242 -0
  89. data/ext/asmjit/asmjit/src/asmjit/core/jitallocator.h +261 -0
  90. data/ext/asmjit/asmjit/src/asmjit/core/jitruntime.cpp +80 -0
  91. data/ext/asmjit/asmjit/src/asmjit/core/jitruntime.h +89 -0
  92. data/ext/asmjit/asmjit/src/asmjit/core/logger.cpp +69 -0
  93. data/ext/asmjit/asmjit/src/asmjit/core/logger.h +198 -0
  94. data/ext/asmjit/asmjit/src/asmjit/core/misc_p.h +33 -0
  95. data/ext/asmjit/asmjit/src/asmjit/core/operand.cpp +132 -0
  96. data/ext/asmjit/asmjit/src/asmjit/core/operand.h +1611 -0
  97. data/ext/asmjit/asmjit/src/asmjit/core/osutils.cpp +84 -0
  98. data/ext/asmjit/asmjit/src/asmjit/core/osutils.h +61 -0
  99. data/ext/asmjit/asmjit/src/asmjit/core/osutils_p.h +68 -0
  100. data/ext/asmjit/asmjit/src/asmjit/core/raassignment_p.h +418 -0
  101. data/ext/asmjit/asmjit/src/asmjit/core/rabuilders_p.h +612 -0
  102. data/ext/asmjit/asmjit/src/asmjit/core/radefs_p.h +1204 -0
  103. data/ext/asmjit/asmjit/src/asmjit/core/ralocal.cpp +1166 -0
  104. data/ext/asmjit/asmjit/src/asmjit/core/ralocal_p.h +254 -0
  105. data/ext/asmjit/asmjit/src/asmjit/core/rapass.cpp +1969 -0
  106. data/ext/asmjit/asmjit/src/asmjit/core/rapass_p.h +1183 -0
  107. data/ext/asmjit/asmjit/src/asmjit/core/rastack.cpp +184 -0
  108. data/ext/asmjit/asmjit/src/asmjit/core/rastack_p.h +171 -0
  109. data/ext/asmjit/asmjit/src/asmjit/core/string.cpp +559 -0
  110. data/ext/asmjit/asmjit/src/asmjit/core/string.h +372 -0
  111. data/ext/asmjit/asmjit/src/asmjit/core/support.cpp +494 -0
  112. data/ext/asmjit/asmjit/src/asmjit/core/support.h +1773 -0
  113. data/ext/asmjit/asmjit/src/asmjit/core/target.cpp +14 -0
  114. data/ext/asmjit/asmjit/src/asmjit/core/target.h +53 -0
  115. data/ext/asmjit/asmjit/src/asmjit/core/type.cpp +74 -0
  116. data/ext/asmjit/asmjit/src/asmjit/core/type.h +419 -0
  117. data/ext/asmjit/asmjit/src/asmjit/core/virtmem.cpp +722 -0
  118. data/ext/asmjit/asmjit/src/asmjit/core/virtmem.h +242 -0
  119. data/ext/asmjit/asmjit/src/asmjit/core/zone.cpp +353 -0
  120. data/ext/asmjit/asmjit/src/asmjit/core/zone.h +615 -0
  121. data/ext/asmjit/asmjit/src/asmjit/core/zonehash.cpp +309 -0
  122. data/ext/asmjit/asmjit/src/asmjit/core/zonehash.h +186 -0
  123. data/ext/asmjit/asmjit/src/asmjit/core/zonelist.cpp +163 -0
  124. data/ext/asmjit/asmjit/src/asmjit/core/zonelist.h +209 -0
  125. data/ext/asmjit/asmjit/src/asmjit/core/zonestack.cpp +176 -0
  126. data/ext/asmjit/asmjit/src/asmjit/core/zonestack.h +239 -0
  127. data/ext/asmjit/asmjit/src/asmjit/core/zonestring.h +120 -0
  128. data/ext/asmjit/asmjit/src/asmjit/core/zonetree.cpp +99 -0
  129. data/ext/asmjit/asmjit/src/asmjit/core/zonetree.h +380 -0
  130. data/ext/asmjit/asmjit/src/asmjit/core/zonevector.cpp +356 -0
  131. data/ext/asmjit/asmjit/src/asmjit/core/zonevector.h +690 -0
  132. data/ext/asmjit/asmjit/src/asmjit/core.h +1861 -0
  133. data/ext/asmjit/asmjit/src/asmjit/x86/x86archtraits_p.h +148 -0
  134. data/ext/asmjit/asmjit/src/asmjit/x86/x86assembler.cpp +5110 -0
  135. data/ext/asmjit/asmjit/src/asmjit/x86/x86assembler.h +685 -0
  136. data/ext/asmjit/asmjit/src/asmjit/x86/x86builder.cpp +52 -0
  137. data/ext/asmjit/asmjit/src/asmjit/x86/x86builder.h +351 -0
  138. data/ext/asmjit/asmjit/src/asmjit/x86/x86compiler.cpp +61 -0
  139. data/ext/asmjit/asmjit/src/asmjit/x86/x86compiler.h +721 -0
  140. data/ext/asmjit/asmjit/src/asmjit/x86/x86emithelper.cpp +619 -0
  141. data/ext/asmjit/asmjit/src/asmjit/x86/x86emithelper_p.h +60 -0
  142. data/ext/asmjit/asmjit/src/asmjit/x86/x86emitter.h +4315 -0
  143. data/ext/asmjit/asmjit/src/asmjit/x86/x86formatter.cpp +944 -0
  144. data/ext/asmjit/asmjit/src/asmjit/x86/x86formatter_p.h +58 -0
  145. data/ext/asmjit/asmjit/src/asmjit/x86/x86func.cpp +503 -0
  146. data/ext/asmjit/asmjit/src/asmjit/x86/x86func_p.h +33 -0
  147. data/ext/asmjit/asmjit/src/asmjit/x86/x86globals.h +2169 -0
  148. data/ext/asmjit/asmjit/src/asmjit/x86/x86instapi.cpp +1732 -0
  149. data/ext/asmjit/asmjit/src/asmjit/x86/x86instapi_p.h +41 -0
  150. data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb.cpp +4427 -0
  151. data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb.h +563 -0
  152. data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb_p.h +311 -0
  153. data/ext/asmjit/asmjit/src/asmjit/x86/x86opcode_p.h +436 -0
  154. data/ext/asmjit/asmjit/src/asmjit/x86/x86operand.cpp +231 -0
  155. data/ext/asmjit/asmjit/src/asmjit/x86/x86operand.h +1085 -0
  156. data/ext/asmjit/asmjit/src/asmjit/x86/x86rapass.cpp +1509 -0
  157. data/ext/asmjit/asmjit/src/asmjit/x86/x86rapass_p.h +94 -0
  158. data/ext/asmjit/asmjit/src/asmjit/x86.h +93 -0
  159. data/ext/asmjit/asmjit/src/asmjit.natvis +245 -0
  160. data/ext/asmjit/asmjit/test/asmjit_test_assembler.cpp +84 -0
  161. data/ext/asmjit/asmjit/test/asmjit_test_assembler.h +85 -0
  162. data/ext/asmjit/asmjit/test/asmjit_test_assembler_a64.cpp +4006 -0
  163. data/ext/asmjit/asmjit/test/asmjit_test_assembler_x64.cpp +17833 -0
  164. data/ext/asmjit/asmjit/test/asmjit_test_assembler_x86.cpp +8300 -0
  165. data/ext/asmjit/asmjit/test/asmjit_test_compiler.cpp +253 -0
  166. data/ext/asmjit/asmjit/test/asmjit_test_compiler.h +73 -0
  167. data/ext/asmjit/asmjit/test/asmjit_test_compiler_a64.cpp +690 -0
  168. data/ext/asmjit/asmjit/test/asmjit_test_compiler_x86.cpp +4317 -0
  169. data/ext/asmjit/asmjit/test/asmjit_test_emitters.cpp +197 -0
  170. data/ext/asmjit/asmjit/test/asmjit_test_instinfo.cpp +181 -0
  171. data/ext/asmjit/asmjit/test/asmjit_test_misc.h +257 -0
  172. data/ext/asmjit/asmjit/test/asmjit_test_perf.cpp +62 -0
  173. data/ext/asmjit/asmjit/test/asmjit_test_perf.h +61 -0
  174. data/ext/asmjit/asmjit/test/asmjit_test_perf_a64.cpp +699 -0
  175. data/ext/asmjit/asmjit/test/asmjit_test_perf_x86.cpp +5032 -0
  176. data/ext/asmjit/asmjit/test/asmjit_test_unit.cpp +172 -0
  177. data/ext/asmjit/asmjit/test/asmjit_test_x86_sections.cpp +172 -0
  178. data/ext/asmjit/asmjit/test/asmjitutils.h +38 -0
  179. data/ext/asmjit/asmjit/test/broken.cpp +312 -0
  180. data/ext/asmjit/asmjit/test/broken.h +148 -0
  181. data/ext/asmjit/asmjit/test/cmdline.h +61 -0
  182. data/ext/asmjit/asmjit/test/performancetimer.h +41 -0
  183. data/ext/asmjit/asmjit/tools/configure-makefiles.sh +13 -0
  184. data/ext/asmjit/asmjit/tools/configure-ninja.sh +13 -0
  185. data/ext/asmjit/asmjit/tools/configure-sanitizers.sh +13 -0
  186. data/ext/asmjit/asmjit/tools/configure-vs2019-x64.bat +2 -0
  187. data/ext/asmjit/asmjit/tools/configure-vs2019-x86.bat +2 -0
  188. data/ext/asmjit/asmjit/tools/configure-vs2022-x64.bat +2 -0
  189. data/ext/asmjit/asmjit/tools/configure-vs2022-x86.bat +2 -0
  190. data/ext/asmjit/asmjit/tools/configure-xcode.sh +8 -0
  191. data/ext/asmjit/asmjit/tools/enumgen.js +417 -0
  192. data/ext/asmjit/asmjit/tools/enumgen.sh +3 -0
  193. data/ext/asmjit/asmjit/tools/tablegen-arm.js +365 -0
  194. data/ext/asmjit/asmjit/tools/tablegen-arm.sh +3 -0
  195. data/ext/asmjit/asmjit/tools/tablegen-x86.js +2638 -0
  196. data/ext/asmjit/asmjit/tools/tablegen-x86.sh +3 -0
  197. data/ext/asmjit/asmjit/tools/tablegen.js +947 -0
  198. data/ext/asmjit/asmjit/tools/tablegen.sh +4 -0
  199. data/ext/asmjit/asmjit.cc +18 -0
  200. data/lib/asmjit/version.rb +1 -1
  201. metadata +197 -2
@@ -0,0 +1,1166 @@
1
+ // This file is part of AsmJit project <https://asmjit.com>
2
+ //
3
+ // See asmjit.h or LICENSE.md for license and copyright information
4
+ // SPDX-License-Identifier: Zlib
5
+
6
+ #include "../core/api-build_p.h"
7
+ #ifndef ASMJIT_NO_COMPILER
8
+
9
+ #include "../core/ralocal_p.h"
10
+ #include "../core/support.h"
11
+
12
+ ASMJIT_BEGIN_NAMESPACE
13
+
14
+ // RALocalAllocator - Utilities
15
+ // ============================
16
+
17
+ static ASMJIT_FORCE_INLINE RATiedReg* RALocal_findTiedRegByWorkId(RATiedReg* tiedRegs, size_t count, uint32_t workId) noexcept {
18
+ for (size_t i = 0; i < count; i++)
19
+ if (tiedRegs[i].workId() == workId)
20
+ return &tiedRegs[i];
21
+ return nullptr;
22
+ }
23
+
24
+ // RALocalAllocator - Init & Reset
25
+ // ===============================
26
+
27
+ Error RALocalAllocator::init() noexcept {
28
+ PhysToWorkMap* physToWorkMap;
29
+ WorkToPhysMap* workToPhysMap;
30
+
31
+ physToWorkMap = _pass->newPhysToWorkMap();
32
+ workToPhysMap = _pass->newWorkToPhysMap();
33
+ if (!physToWorkMap || !workToPhysMap)
34
+ return DebugUtils::errored(kErrorOutOfMemory);
35
+
36
+ _curAssignment.initLayout(_pass->_physRegCount, _pass->workRegs());
37
+ _curAssignment.initMaps(physToWorkMap, workToPhysMap);
38
+
39
+ physToWorkMap = _pass->newPhysToWorkMap();
40
+ workToPhysMap = _pass->newWorkToPhysMap();
41
+ _tmpWorkToPhysMap = _pass->newWorkToPhysMap();
42
+
43
+ if (!physToWorkMap || !workToPhysMap || !_tmpWorkToPhysMap)
44
+ return DebugUtils::errored(kErrorOutOfMemory);
45
+
46
+ _tmpAssignment.initLayout(_pass->_physRegCount, _pass->workRegs());
47
+ _tmpAssignment.initMaps(physToWorkMap, workToPhysMap);
48
+
49
+ return kErrorOk;
50
+ }
51
+
52
+ // RALocalAllocator - Assignment
53
+ // =============================
54
+
55
+ Error RALocalAllocator::makeInitialAssignment() noexcept {
56
+ FuncNode* func = _pass->func();
57
+ RABlock* entry = _pass->entryBlock();
58
+
59
+ ZoneBitVector& liveIn = entry->liveIn();
60
+ uint32_t argCount = func->argCount();
61
+ uint32_t numIter = 1;
62
+
63
+ for (uint32_t iter = 0; iter < numIter; iter++) {
64
+ for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
65
+ for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
66
+ // Unassigned argument.
67
+ const RegOnly& regArg = func->argPack(argIndex)[valueIndex];
68
+ if (!regArg.isReg() || !_cc->isVirtIdValid(regArg.id()))
69
+ continue;
70
+
71
+ VirtReg* virtReg = _cc->virtRegById(regArg.id());
72
+
73
+ // Unreferenced argument.
74
+ RAWorkReg* workReg = virtReg->workReg();
75
+ if (!workReg)
76
+ continue;
77
+
78
+ // Overwritten argument.
79
+ uint32_t workId = workReg->workId();
80
+ if (!liveIn.bitAt(workId))
81
+ continue;
82
+
83
+ RegGroup group = workReg->group();
84
+ if (_curAssignment.workToPhysId(group, workId) != RAAssignment::kPhysNone)
85
+ continue;
86
+
87
+ RegMask allocableRegs = _availableRegs[group] & ~_curAssignment.assigned(group);
88
+ if (iter == 0) {
89
+ // First iteration: Try to allocate to home RegId.
90
+ if (workReg->hasHomeRegId()) {
91
+ uint32_t physId = workReg->homeRegId();
92
+ if (Support::bitTest(allocableRegs, physId)) {
93
+ _curAssignment.assign(group, workId, physId, true);
94
+ _pass->_argsAssignment.assignRegInPack(argIndex, valueIndex, workReg->type(), physId, workReg->typeId());
95
+ continue;
96
+ }
97
+ }
98
+
99
+ numIter = 2;
100
+ }
101
+ else {
102
+ // Second iteration: Pick any other register if the is an unassigned one or assign to stack.
103
+ if (allocableRegs) {
104
+ uint32_t physId = Support::ctz(allocableRegs);
105
+ _curAssignment.assign(group, workId, physId, true);
106
+ _pass->_argsAssignment.assignRegInPack(argIndex, valueIndex, workReg->type(), physId, workReg->typeId());
107
+ }
108
+ else {
109
+ // This register will definitely need stack, create the slot now and assign also `argIndex`
110
+ // to it. We will patch `_argsAssignment` later after RAStackAllocator finishes.
111
+ RAStackSlot* slot = _pass->getOrCreateStackSlot(workReg);
112
+ if (ASMJIT_UNLIKELY(!slot))
113
+ return DebugUtils::errored(kErrorOutOfMemory);
114
+
115
+ // This means STACK_ARG may be moved to STACK.
116
+ workReg->addFlags(RAWorkRegFlags::kStackArgToStack);
117
+ _pass->_numStackArgsToStackSlots++;
118
+ }
119
+ }
120
+ }
121
+ }
122
+ }
123
+
124
+ return kErrorOk;
125
+ }
126
+
127
+ Error RALocalAllocator::replaceAssignment(const PhysToWorkMap* physToWorkMap) noexcept {
128
+ _curAssignment.copyFrom(physToWorkMap);
129
+ return kErrorOk;
130
+ }
131
+
132
+ Error RALocalAllocator::switchToAssignment(PhysToWorkMap* dstPhysToWorkMap, const ZoneBitVector& liveIn, bool dstReadOnly, bool tryMode) noexcept {
133
+ RAAssignment dst;
134
+ RAAssignment& cur = _curAssignment;
135
+
136
+ dst.initLayout(_pass->_physRegCount, _pass->workRegs());
137
+ dst.initMaps(dstPhysToWorkMap, _tmpWorkToPhysMap);
138
+ dst.assignWorkIdsFromPhysIds();
139
+
140
+ if (tryMode)
141
+ return kErrorOk;
142
+
143
+ for (RegGroup group : RegGroupVirtValues{}) {
144
+ // STEP 1
145
+ // ------
146
+ //
147
+ // - KILL all registers that are not live at `dst`,
148
+ // - SPILL all registers that are not assigned at `dst`.
149
+
150
+ if (!tryMode) {
151
+ Support::BitWordIterator<RegMask> it(cur.assigned(group));
152
+ while (it.hasNext()) {
153
+ uint32_t physId = it.next();
154
+ uint32_t workId = cur.physToWorkId(group, physId);
155
+
156
+ // Must be true as we iterate over assigned registers.
157
+ ASMJIT_ASSERT(workId != RAAssignment::kWorkNone);
158
+
159
+ // KILL if it's not live on entry.
160
+ if (!liveIn.bitAt(workId)) {
161
+ onKillReg(group, workId, physId);
162
+ continue;
163
+ }
164
+
165
+ // SPILL if it's not assigned on entry.
166
+ uint32_t altId = dst.workToPhysId(group, workId);
167
+ if (altId == RAAssignment::kPhysNone) {
168
+ ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
169
+ }
170
+ }
171
+ }
172
+
173
+ // STEP 2
174
+ // ------
175
+ //
176
+ // - MOVE and SWAP registers from their current assignments into their DST assignments.
177
+ // - Build `willLoadRegs` mask of registers scheduled for `onLoadReg()`.
178
+
179
+ // Current run-id (1 means more aggressive decisions).
180
+ int32_t runId = -1;
181
+ // Remaining registers scheduled for `onLoadReg()`.
182
+ RegMask willLoadRegs = 0;
183
+ // Remaining registers to be allocated in this loop.
184
+ RegMask affectedRegs = dst.assigned(group);
185
+
186
+ while (affectedRegs) {
187
+ if (++runId == 2) {
188
+ if (!tryMode)
189
+ return DebugUtils::errored(kErrorInvalidState);
190
+
191
+ // Stop in `tryMode` if we haven't done anything in past two rounds.
192
+ break;
193
+ }
194
+
195
+ Support::BitWordIterator<RegMask> it(affectedRegs);
196
+ while (it.hasNext()) {
197
+ uint32_t physId = it.next();
198
+ RegMask physMask = Support::bitMask<RegMask>(physId);
199
+
200
+ uint32_t curWorkId = cur.physToWorkId(group, physId);
201
+ uint32_t dstWorkId = dst.physToWorkId(group, physId);
202
+
203
+ // The register must have assigned `dstWorkId` as we only iterate over assigned regs.
204
+ ASMJIT_ASSERT(dstWorkId != RAAssignment::kWorkNone);
205
+
206
+ if (curWorkId != RAAssignment::kWorkNone) {
207
+ // Both assigned.
208
+ if (curWorkId != dstWorkId) {
209
+ // Wait a bit if this is the first run, we may avoid this if `curWorkId` moves out.
210
+ if (runId <= 0)
211
+ continue;
212
+
213
+ uint32_t altPhysId = cur.workToPhysId(group, dstWorkId);
214
+ if (altPhysId == RAAssignment::kPhysNone)
215
+ continue;
216
+
217
+ // Reset as we will do some changes to the current assignment.
218
+ runId = -1;
219
+
220
+ if (_archTraits->hasInstRegSwap(group)) {
221
+ ASMJIT_PROPAGATE(onSwapReg(group, curWorkId, physId, dstWorkId, altPhysId));
222
+ }
223
+ else {
224
+ // SPILL the reg if it's not dirty in DST, otherwise try to MOVE.
225
+ if (!cur.isPhysDirty(group, physId)) {
226
+ ASMJIT_PROPAGATE(onKillReg(group, curWorkId, physId));
227
+ }
228
+ else {
229
+ RegMask allocableRegs = _pass->_availableRegs[group] & ~cur.assigned(group);
230
+
231
+ // If possible don't conflict with assigned regs at DST.
232
+ if (allocableRegs & ~dst.assigned(group))
233
+ allocableRegs &= ~dst.assigned(group);
234
+
235
+ if (allocableRegs) {
236
+ // MOVE is possible, thus preferred.
237
+ uint32_t tmpPhysId = Support::ctz(allocableRegs);
238
+
239
+ ASMJIT_PROPAGATE(onMoveReg(group, curWorkId, tmpPhysId, physId));
240
+ _pass->_clobberedRegs[group] |= Support::bitMask(tmpPhysId);
241
+ }
242
+ else {
243
+ // MOVE is impossible, must SPILL.
244
+ ASMJIT_PROPAGATE(onSpillReg(group, curWorkId, physId));
245
+ }
246
+ }
247
+
248
+ goto Cleared;
249
+ }
250
+ }
251
+ }
252
+ else {
253
+ Cleared:
254
+ // DST assigned, CUR unassigned.
255
+ uint32_t altPhysId = cur.workToPhysId(group, dstWorkId);
256
+ if (altPhysId == RAAssignment::kPhysNone) {
257
+ if (liveIn.bitAt(dstWorkId))
258
+ willLoadRegs |= physMask; // Scheduled for `onLoadReg()`.
259
+ affectedRegs &= ~physMask; // Unaffected from now.
260
+ continue;
261
+ }
262
+ ASMJIT_PROPAGATE(onMoveReg(group, dstWorkId, physId, altPhysId));
263
+ }
264
+
265
+ // Both DST and CUR assigned to the same reg or CUR just moved to DST.
266
+ if ((dst.dirty(group) & physMask) != (cur.dirty(group) & physMask)) {
267
+ if ((dst.dirty(group) & physMask) == 0) {
268
+ // CUR dirty, DST not dirty (the assert is just to visualize the condition).
269
+ ASMJIT_ASSERT(!dst.isPhysDirty(group, physId) && cur.isPhysDirty(group, physId));
270
+
271
+ // If `dstReadOnly` is true it means that that block was already processed and we cannot change from
272
+ // CLEAN to DIRTY. In that case the register has to be saved as it cannot enter the block DIRTY.
273
+ if (dstReadOnly)
274
+ ASMJIT_PROPAGATE(onSaveReg(group, dstWorkId, physId));
275
+ else
276
+ dst.makeDirty(group, dstWorkId, physId);
277
+ }
278
+ else {
279
+ // DST dirty, CUR not dirty (the assert is just to visualize the condition).
280
+ ASMJIT_ASSERT(dst.isPhysDirty(group, physId) && !cur.isPhysDirty(group, physId));
281
+
282
+ cur.makeDirty(group, dstWorkId, physId);
283
+ }
284
+ }
285
+
286
+ // Must match now...
287
+ ASMJIT_ASSERT(dst.physToWorkId(group, physId) == cur.physToWorkId(group, physId));
288
+ ASMJIT_ASSERT(dst.isPhysDirty(group, physId) == cur.isPhysDirty(group, physId));
289
+
290
+ runId = -1;
291
+ affectedRegs &= ~physMask;
292
+ }
293
+ }
294
+
295
+ // STEP 3
296
+ // ------
297
+ //
298
+ // - Load registers specified by `willLoadRegs`.
299
+
300
+ {
301
+ Support::BitWordIterator<RegMask> it(willLoadRegs);
302
+ while (it.hasNext()) {
303
+ uint32_t physId = it.next();
304
+
305
+ if (!cur.isPhysAssigned(group, physId)) {
306
+ uint32_t workId = dst.physToWorkId(group, physId);
307
+
308
+ // The algorithm is broken if it tries to load a register that is not in LIVE-IN.
309
+ ASMJIT_ASSERT(liveIn.bitAt(workId) == true);
310
+
311
+ ASMJIT_PROPAGATE(onLoadReg(group, workId, physId));
312
+ if (dst.isPhysDirty(group, physId))
313
+ cur.makeDirty(group, workId, physId);
314
+ ASMJIT_ASSERT(dst.isPhysDirty(group, physId) == cur.isPhysDirty(group, physId));
315
+ }
316
+ else {
317
+ // Not possible otherwise.
318
+ ASMJIT_ASSERT(tryMode == true);
319
+ }
320
+ }
321
+ }
322
+ }
323
+
324
+ if (!tryMode) {
325
+ // Here is a code that dumps the conflicting part if something fails here:
326
+ // if (!dst.equals(cur)) {
327
+ // uint32_t physTotal = dst._layout.physTotal;
328
+ // uint32_t workCount = dst._layout.workCount;
329
+ //
330
+ // fprintf(stderr, "Dirty DST=0x%08X CUR=0x%08X\n", dst.dirty(RegGroup::kGp), cur.dirty(RegGroup::kGp));
331
+ // fprintf(stderr, "Assigned DST=0x%08X CUR=0x%08X\n", dst.assigned(RegGroup::kGp), cur.assigned(RegGroup::kGp));
332
+ //
333
+ // for (uint32_t physId = 0; physId < physTotal; physId++) {
334
+ // uint32_t dstWorkId = dst._physToWorkMap->workIds[physId];
335
+ // uint32_t curWorkId = cur._physToWorkMap->workIds[physId];
336
+ // if (dstWorkId != curWorkId)
337
+ // fprintf(stderr, "[PhysIdWork] PhysId=%u WorkId[DST(%u) != CUR(%u)]\n", physId, dstWorkId, curWorkId);
338
+ // }
339
+ //
340
+ // for (uint32_t workId = 0; workId < workCount; workId++) {
341
+ // uint32_t dstPhysId = dst._workToPhysMap->physIds[workId];
342
+ // uint32_t curPhysId = cur._workToPhysMap->physIds[workId];
343
+ // if (dstPhysId != curPhysId)
344
+ // fprintf(stderr, "[WorkToPhys] WorkId=%u PhysId[DST(%u) != CUR(%u)]\n", workId, dstPhysId, curPhysId);
345
+ // }
346
+ // }
347
+ ASMJIT_ASSERT(dst.equals(cur));
348
+ }
349
+
350
+ return kErrorOk;
351
+ }
352
+
353
+ Error RALocalAllocator::spillScratchGpRegsBeforeEntry(RegMask scratchRegs) noexcept {
354
+ RegGroup group = RegGroup::kGp;
355
+ Support::BitWordIterator<RegMask> it(scratchRegs);
356
+
357
+ while (it.hasNext()) {
358
+ uint32_t physId = it.next();
359
+ if (_curAssignment.isPhysAssigned(group, physId)) {
360
+ uint32_t workId = _curAssignment.physToWorkId(group, physId);
361
+ ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
362
+ }
363
+ }
364
+
365
+ return kErrorOk;
366
+ }
367
+
368
+ // RALocalAllocator - Allocation
369
+ // =============================
370
+
371
+ Error RALocalAllocator::allocInst(InstNode* node) noexcept {
372
+ RAInst* raInst = node->passData<RAInst>();
373
+
374
+ RATiedReg* outTiedRegs[Globals::kMaxPhysRegs];
375
+ RATiedReg* dupTiedRegs[Globals::kMaxPhysRegs];
376
+ RATiedReg* consecutiveRegs[kMaxConsecutiveRegs];
377
+
378
+ // The cursor must point to the previous instruction for a possible instruction insertion.
379
+ _cc->_setCursor(node->prev());
380
+
381
+ _node = node;
382
+ _raInst = raInst;
383
+ _tiedTotal = raInst->_tiedTotal;
384
+ _tiedCount = raInst->_tiedCount;
385
+
386
+ // Whether we already replaced register operand with memory operand.
387
+ bool rmAllocated = false;
388
+
389
+ for (RegGroup group : RegGroupVirtValues{}) {
390
+ uint32_t i, count = this->tiedCount(group);
391
+ RATiedReg* tiedRegs = this->tiedRegs(group);
392
+
393
+ RegMask willUse = _raInst->_usedRegs[group];
394
+ RegMask willOut = _raInst->_clobberedRegs[group];
395
+ RegMask willFree = 0;
396
+
397
+ uint32_t usePending = count;
398
+ uint32_t outTiedCount = 0;
399
+ uint32_t dupTiedCount = 0;
400
+ uint32_t consecutiveMask = 0;
401
+
402
+ // STEP 1
403
+ // ------
404
+ //
405
+ // Calculate `willUse` and `willFree` masks based on tied registers we have. In addition, aggregate information
406
+ // regarding consecutive registers used by this instruction. We need that to make USE/OUT assignments.
407
+ //
408
+ // We don't do any assignment decisions at this stage as we just need to collect some information first. Then,
409
+ // after we populate all masks needed we can finally make some decisions in the second loop. The main reason
410
+ // for this is that we really need `willFree` to make assignment decisions for `willUse`, because if we mark
411
+ // some registers that will be freed, we can consider them in decision making afterwards.
412
+
413
+ for (i = 0; i < count; i++) {
414
+ RATiedReg* tiedReg = &tiedRegs[i];
415
+
416
+ if (tiedReg->hasAnyConsecutiveFlag()) {
417
+ uint32_t consecutiveOffset = tiedReg->isLeadConsecutive() ? uint32_t(0) : tiedReg->consecutiveData();
418
+
419
+ if (ASMJIT_UNLIKELY(Support::bitTest(consecutiveMask, consecutiveOffset)))
420
+ return DebugUtils::errored(kErrorInvalidState);
421
+
422
+ consecutiveMask |= Support::bitMask(consecutiveOffset);
423
+ consecutiveRegs[consecutiveOffset] = tiedReg;
424
+ }
425
+
426
+ // Add OUT and KILL to `outPending` for CLOBBERing and/or OUT assignment.
427
+ if (tiedReg->isOutOrKill())
428
+ outTiedRegs[outTiedCount++] = tiedReg;
429
+
430
+ if (tiedReg->isDuplicate())
431
+ dupTiedRegs[dupTiedCount++] = tiedReg;
432
+
433
+ if (!tiedReg->isUse()) {
434
+ tiedReg->markUseDone();
435
+ usePending--;
436
+ continue;
437
+ }
438
+
439
+ // Don't assign anything here if this is a consecutive USE - we will handle this in STEP 2 instead.
440
+ if (tiedReg->isUseConsecutive())
441
+ continue;
442
+
443
+ uint32_t workId = tiedReg->workId();
444
+ uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
445
+
446
+ if (tiedReg->hasUseId()) {
447
+ // If the register has `useId` it means it can only be allocated in that register.
448
+ RegMask useMask = Support::bitMask(tiedReg->useId());
449
+
450
+ // RAInstBuilder must have collected `usedRegs` on-the-fly.
451
+ ASMJIT_ASSERT((willUse & useMask) != 0);
452
+
453
+ if (assignedId == tiedReg->useId()) {
454
+ // If the register is already allocated in this one, mark it done and continue.
455
+ tiedReg->markUseDone();
456
+ if (tiedReg->isWrite())
457
+ _curAssignment.makeDirty(group, workId, assignedId);
458
+ usePending--;
459
+ willUse |= useMask;
460
+ }
461
+ else {
462
+ willFree |= useMask & _curAssignment.assigned(group);
463
+ }
464
+ }
465
+ else {
466
+ // Check if the register must be moved to `allocableRegs`.
467
+ RegMask allocableRegs = tiedReg->useRegMask();
468
+ if (assignedId != RAAssignment::kPhysNone) {
469
+ RegMask assignedMask = Support::bitMask(assignedId);
470
+ if ((allocableRegs & ~willUse) & assignedMask) {
471
+ tiedReg->setUseId(assignedId);
472
+ tiedReg->markUseDone();
473
+ if (tiedReg->isWrite())
474
+ _curAssignment.makeDirty(group, workId, assignedId);
475
+ usePending--;
476
+ willUse |= assignedMask;
477
+ }
478
+ else {
479
+ willFree |= assignedMask;
480
+ }
481
+ }
482
+ }
483
+ }
484
+
485
+ // STEP 2
486
+ // ------
487
+ //
488
+ // Verify that all the consecutive registers are really consecutive. Terminate if there is a gap. In addition,
489
+ // decide which USE ids will be used in case that this consecutive sequence is USE (OUT registers are allocated
490
+ // in a different step).
491
+ uint32_t consecutiveCount = 0;
492
+
493
+ if (consecutiveMask) {
494
+ if ((consecutiveMask & (consecutiveMask + 1u)) != 0)
495
+ return DebugUtils::errored(kErrorInvalidState);
496
+
497
+ // Count of trailing ones is the count of consecutive registers. There cannot be gap.
498
+ consecutiveCount = Support::ctz(~consecutiveMask);
499
+
500
+ // Prioritize allocation that would result in least moves even when moving registers away from their homes.
501
+ RATiedReg* lead = consecutiveRegs[0];
502
+
503
+ // Assign the best possible USE Ids to all consecutives.
504
+ if (lead->isUseConsecutive()) {
505
+ uint32_t bestScore = 0;
506
+ uint32_t bestLeadReg = 0xFFFFFFFF;
507
+ RegMask allocableRegs = (_availableRegs[group] | willFree) & ~willUse;
508
+
509
+ uint32_t assignments[kMaxConsecutiveRegs];
510
+
511
+ for (i = 0; i < consecutiveCount; i++)
512
+ assignments[i] = _curAssignment.workToPhysId(group, consecutiveRegs[i]->workId());
513
+
514
+ Support::BitWordIterator<uint32_t> it(lead->useRegMask());
515
+ while (it.hasNext()) {
516
+ uint32_t regIndex = it.next();
517
+ if (Support::bitTest(lead->useRegMask(), regIndex)) {
518
+ uint32_t score = 15;
519
+
520
+ for (i = 0; i < consecutiveCount; i++) {
521
+ uint32_t consecutiveIndex = regIndex + i;
522
+ if (!Support::bitTest(allocableRegs, consecutiveIndex)) {
523
+ score = 0;
524
+ break;
525
+ }
526
+
527
+ RAWorkReg* workReg = workRegById(consecutiveRegs[i]->workId());
528
+ score += uint32_t(workReg->homeRegId() == consecutiveIndex);
529
+ score += uint32_t(assignments[i] == consecutiveIndex) * 2;
530
+ }
531
+
532
+ if (score > bestScore) {
533
+ bestScore = score;
534
+ bestLeadReg = regIndex;
535
+ }
536
+ }
537
+ }
538
+
539
+ if (bestLeadReg == 0xFFFFFFFF)
540
+ return DebugUtils::errored(kErrorConsecutiveRegsAllocation);
541
+
542
+ for (i = 0; i < consecutiveCount; i++) {
543
+ uint32_t consecutiveIndex = bestLeadReg + i;
544
+
545
+ RATiedReg* tiedReg = consecutiveRegs[i];
546
+ RegMask useMask = Support::bitMask(consecutiveIndex);
547
+
548
+ uint32_t workId = tiedReg->workId();
549
+ uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
550
+
551
+ tiedReg->setUseId(consecutiveIndex);
552
+
553
+ if (assignedId == consecutiveIndex) {
554
+ // If the register is already allocated in this one, mark it done and continue.
555
+ tiedReg->markUseDone();
556
+ if (tiedReg->isWrite())
557
+ _curAssignment.makeDirty(group, workId, assignedId);
558
+ usePending--;
559
+ willUse |= useMask;
560
+ }
561
+ else {
562
+ willUse |= useMask;
563
+ willFree |= useMask & _curAssignment.assigned(group);
564
+ }
565
+ }
566
+ }
567
+ }
568
+
569
+ // STEP 3
570
+ // ------
571
+ //
572
+ // Do some decision making to find the best candidates of registers that need to be assigned, moved, and/or
573
+ // spilled. Only USE registers are considered here, OUT will be decided later after all CLOBBERed and OUT
574
+ // registers are unassigned.
575
+
576
+ if (usePending) {
577
+ // TODO: Not sure `liveRegs` should be used, maybe willUse and willFree would be enough and much more clear.
578
+
579
+ // All registers that are currently alive without registers that will be freed.
580
+ RegMask liveRegs = _curAssignment.assigned(group) & ~willFree;
581
+
582
+ for (i = 0; i < count; i++) {
583
+ RATiedReg* tiedReg = &tiedRegs[i];
584
+ if (tiedReg->isUseDone())
585
+ continue;
586
+
587
+ uint32_t workId = tiedReg->workId();
588
+ uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
589
+
590
+ // REG/MEM: Patch register operand to memory operand if not allocated.
591
+ if (!rmAllocated && tiedReg->hasUseRM()) {
592
+ if (assignedId == RAAssignment::kPhysNone && Support::isPowerOf2(tiedReg->useRewriteMask())) {
593
+ RAWorkReg* workReg = workRegById(tiedReg->workId());
594
+ uint32_t opIndex = Support::ctz(tiedReg->useRewriteMask()) / uint32_t(sizeof(Operand) / sizeof(uint32_t));
595
+ uint32_t rmSize = tiedReg->rmSize();
596
+
597
+ if (rmSize <= workReg->virtReg()->virtSize()) {
598
+ Operand& op = node->operands()[opIndex];
599
+ op = _pass->workRegAsMem(workReg);
600
+ op.as<BaseMem>().setSize(rmSize);
601
+ tiedReg->_useRewriteMask = 0;
602
+
603
+ tiedReg->markUseDone();
604
+ usePending--;
605
+
606
+ rmAllocated = true;
607
+ continue;
608
+ }
609
+ }
610
+ }
611
+
612
+ if (!tiedReg->hasUseId()) {
613
+ // DECIDE where to assign the USE register.
614
+ RegMask allocableRegs = tiedReg->useRegMask() & ~(willFree | willUse);
615
+ uint32_t useId = decideOnAssignment(group, workId, assignedId, allocableRegs);
616
+
617
+ RegMask useMask = Support::bitMask(useId);
618
+ willUse |= useMask;
619
+ willFree |= useMask & liveRegs;
620
+ tiedReg->setUseId(useId);
621
+
622
+ if (assignedId != RAAssignment::kPhysNone) {
623
+ RegMask assignedMask = Support::bitMask(assignedId);
624
+
625
+ willFree |= assignedMask;
626
+ liveRegs &= ~assignedMask;
627
+
628
+ // OPTIMIZATION: Assign the USE register here if it's possible.
629
+ if (!(liveRegs & useMask)) {
630
+ ASMJIT_PROPAGATE(onMoveReg(group, workId, useId, assignedId));
631
+ tiedReg->markUseDone();
632
+ if (tiedReg->isWrite())
633
+ _curAssignment.makeDirty(group, workId, useId);
634
+ usePending--;
635
+ }
636
+ }
637
+ else {
638
+ // OPTIMIZATION: Assign the USE register here if it's possible.
639
+ if (!(liveRegs & useMask)) {
640
+ ASMJIT_PROPAGATE(onLoadReg(group, workId, useId));
641
+ tiedReg->markUseDone();
642
+ if (tiedReg->isWrite())
643
+ _curAssignment.makeDirty(group, workId, useId);
644
+ usePending--;
645
+ }
646
+ }
647
+
648
+ liveRegs |= useMask;
649
+ }
650
+ }
651
+ }
652
+
653
+ // Initially all used regs will be marked as clobbered.
654
+ RegMask clobberedByInst = willUse | willOut;
655
+
656
+ // STEP 4
657
+ // ------
658
+ //
659
+ // Free all registers that we marked as `willFree`. Only registers that are not USEd by the instruction are
660
+ // considered as we don't want to free regs we need.
661
+
662
+ if (willFree) {
663
+ RegMask allocableRegs = _availableRegs[group] & ~(_curAssignment.assigned(group) | willFree | willUse | willOut);
664
+ Support::BitWordIterator<RegMask> it(willFree);
665
+
666
+ do {
667
+ uint32_t assignedId = it.next();
668
+ if (_curAssignment.isPhysAssigned(group, assignedId)) {
669
+ uint32_t workId = _curAssignment.physToWorkId(group, assignedId);
670
+
671
+ // DECIDE whether to MOVE or SPILL.
672
+ if (allocableRegs) {
673
+ uint32_t reassignedId = decideOnReassignment(group, workId, assignedId, allocableRegs);
674
+ if (reassignedId != RAAssignment::kPhysNone) {
675
+ ASMJIT_PROPAGATE(onMoveReg(group, workId, reassignedId, assignedId));
676
+ allocableRegs ^= Support::bitMask(reassignedId);
677
+ continue;
678
+ }
679
+ }
680
+
681
+ ASMJIT_PROPAGATE(onSpillReg(group, workId, assignedId));
682
+ }
683
+ } while (it.hasNext());
684
+ }
685
+
686
+ // STEP 5
687
+ // ------
688
+ //
689
+ // ALLOCATE / SHUFFLE all registers that we marked as `willUse` and weren't allocated yet. This is a bit
690
+ // complicated as the allocation is iterative. In some cases we have to wait before allocating a particual
691
+ // physical register as it's still occupied by some other one, which we need to move before we can use it.
692
+ // In this case we skip it and allocate another some other instead (making it free for another iteration).
693
+ //
694
+ // NOTE: Iterations are mostly important for complicated allocations like function calls, where there can
695
+ // be up to N registers used at once. Asm instructions won't run the loop more than once in 99.9% of cases
696
+ // as they use 2..3 registers in average.
697
+
698
+ if (usePending) {
699
+ bool mustSwap = false;
700
+ do {
701
+ uint32_t oldPending = usePending;
702
+
703
+ for (i = 0; i < count; i++) {
704
+ RATiedReg* thisTiedReg = &tiedRegs[i];
705
+ if (thisTiedReg->isUseDone())
706
+ continue;
707
+
708
+ uint32_t thisWorkId = thisTiedReg->workId();
709
+ uint32_t thisPhysId = _curAssignment.workToPhysId(group, thisWorkId);
710
+
711
+ // This would be a bug, fatal one!
712
+ uint32_t targetPhysId = thisTiedReg->useId();
713
+ ASMJIT_ASSERT(targetPhysId != thisPhysId);
714
+
715
+ uint32_t targetWorkId = _curAssignment.physToWorkId(group, targetPhysId);
716
+ if (targetWorkId != RAAssignment::kWorkNone) {
717
+ RAWorkReg* targetWorkReg = workRegById(targetWorkId);
718
+
719
+ // Swapping two registers can solve two allocation tasks by emitting just a single instruction. However,
720
+ // swap is only available on few architectures and it's definitely not available for each register group.
721
+ // Calling `onSwapReg()` before checking these would be fatal.
722
+ if (_archTraits->hasInstRegSwap(group) && thisPhysId != RAAssignment::kPhysNone) {
723
+ ASMJIT_PROPAGATE(onSwapReg(group, thisWorkId, thisPhysId, targetWorkId, targetPhysId));
724
+
725
+ thisTiedReg->markUseDone();
726
+ if (thisTiedReg->isWrite())
727
+ _curAssignment.makeDirty(group, thisWorkId, targetPhysId);
728
+ usePending--;
729
+
730
+ // Double-hit.
731
+ RATiedReg* targetTiedReg = RALocal_findTiedRegByWorkId(tiedRegs, count, targetWorkReg->workId());
732
+ if (targetTiedReg && targetTiedReg->useId() == thisPhysId) {
733
+ targetTiedReg->markUseDone();
734
+ if (targetTiedReg->isWrite())
735
+ _curAssignment.makeDirty(group, targetWorkId, thisPhysId);
736
+ usePending--;
737
+ }
738
+ continue;
739
+ }
740
+
741
+ if (!mustSwap)
742
+ continue;
743
+
744
+ // Only branched here if the previous iteration did nothing. This is essentially a SWAP operation without
745
+ // having a dedicated instruction for that purpose (vector registers, etc). The simplest way to handle
746
+ // such case is to SPILL the target register.
747
+ ASMJIT_PROPAGATE(onSpillReg(group, targetWorkId, targetPhysId));
748
+ }
749
+
750
+ if (thisPhysId != RAAssignment::kPhysNone) {
751
+ ASMJIT_PROPAGATE(onMoveReg(group, thisWorkId, targetPhysId, thisPhysId));
752
+
753
+ thisTiedReg->markUseDone();
754
+ if (thisTiedReg->isWrite())
755
+ _curAssignment.makeDirty(group, thisWorkId, targetPhysId);
756
+ usePending--;
757
+ }
758
+ else {
759
+ ASMJIT_PROPAGATE(onLoadReg(group, thisWorkId, targetPhysId));
760
+
761
+ thisTiedReg->markUseDone();
762
+ if (thisTiedReg->isWrite())
763
+ _curAssignment.makeDirty(group, thisWorkId, targetPhysId);
764
+ usePending--;
765
+ }
766
+ }
767
+
768
+ mustSwap = (oldPending == usePending);
769
+ } while (usePending);
770
+ }
771
+
772
+ // STEP 6
773
+ // ------
774
+ //
775
+ // KILL registers marked as KILL/OUT.
776
+
777
+ uint32_t outPending = outTiedCount;
778
+ if (outTiedCount) {
779
+ for (i = 0; i < outTiedCount; i++) {
780
+ RATiedReg* tiedReg = outTiedRegs[i];
781
+
782
+ uint32_t workId = tiedReg->workId();
783
+ uint32_t physId = _curAssignment.workToPhysId(group, workId);
784
+
785
+ // Must check if it's allocated as KILL can be related to OUT (like KILL immediately after OUT, which could
786
+ // mean the register is not assigned).
787
+ if (physId != RAAssignment::kPhysNone) {
788
+ ASMJIT_PROPAGATE(onKillReg(group, workId, physId));
789
+ willOut &= ~Support::bitMask(physId);
790
+ }
791
+
792
+ // We still maintain number of pending registers for OUT assignment. So, if this is only KILL, not OUT, we
793
+ // can safely decrement it.
794
+ outPending -= !tiedReg->isOut();
795
+ }
796
+ }
797
+
798
+ // STEP 7
799
+ // ------
800
+ //
801
+ // SPILL registers that will be CLOBBERed. Since OUT and KILL were already processed this is used mostly to
802
+ // handle function CALLs.
803
+
804
+ if (willOut) {
805
+ Support::BitWordIterator<RegMask> it(willOut);
806
+ do {
807
+ uint32_t physId = it.next();
808
+ uint32_t workId = _curAssignment.physToWorkId(group, physId);
809
+
810
+ if (workId == RAAssignment::kWorkNone)
811
+ continue;
812
+
813
+ ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
814
+ } while (it.hasNext());
815
+ }
816
+
817
+ // STEP 8
818
+ // ------
819
+ //
820
+ // Duplication.
821
+
822
+ for (i = 0; i < dupTiedCount; i++) {
823
+ RATiedReg* tiedReg = dupTiedRegs[i];
824
+ uint32_t workId = tiedReg->workId();
825
+ uint32_t srcId = tiedReg->useId();
826
+
827
+ Support::BitWordIterator<RegMask> it(tiedReg->useRegMask());
828
+ while (it.hasNext()) {
829
+ uint32_t dstId = it.next();
830
+ if (dstId == srcId)
831
+ continue;
832
+ _pass->emitMove(workId, dstId, srcId);
833
+ }
834
+ }
835
+
836
+ // STEP 9
837
+ // ------
838
+ //
839
+ // Vector registers can be cloberred partially by invoke - find if that's the case and clobber when necessary.
840
+
841
+ if (node->isInvoke() && group == RegGroup::kVec) {
842
+ const InvokeNode* invokeNode = node->as<InvokeNode>();
843
+
844
+ RegMask maybeClobberedRegs = invokeNode->detail().callConv().preservedRegs(group) & _curAssignment.assigned(group);
845
+ if (maybeClobberedRegs) {
846
+ uint32_t saveRestoreVecSize = invokeNode->detail().callConv().saveRestoreRegSize(group);
847
+ Support::BitWordIterator<RegMask> it(maybeClobberedRegs);
848
+
849
+ do {
850
+ uint32_t physId = it.next();
851
+ uint32_t workId = _curAssignment.physToWorkId(group, physId);
852
+
853
+ RAWorkReg* workReg = workRegById(workId);
854
+ uint32_t virtSize = workReg->virtReg()->virtSize();
855
+
856
+ if (virtSize > saveRestoreVecSize) {
857
+ ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
858
+ }
859
+
860
+ } while (it.hasNext());
861
+ }
862
+ }
863
+
864
+ // STEP 10
865
+ // -------
866
+ //
867
+ // Assign OUT registers.
868
+
869
+ if (outPending) {
870
+ // Live registers, we need a separate register (outside of `_curAssignment) to hold these because of KILLed
871
+ // registers. If we KILL a register here it will go out from `_curAssignment`, but we cannot assign to it in
872
+ // here.
873
+ RegMask liveRegs = _curAssignment.assigned(group);
874
+
875
+ // Must avoid as they have been already OUTed (added during the loop).
876
+ RegMask outRegs = 0;
877
+
878
+ // Must avoid as they collide with already allocated ones.
879
+ RegMask avoidRegs = willUse & ~clobberedByInst;
880
+
881
+ // Assign the best possible OUT ids of all consecutives.
882
+ if (consecutiveCount) {
883
+ RATiedReg* lead = consecutiveRegs[0];
884
+ if (lead->isOutConsecutive()) {
885
+ uint32_t bestScore = 0;
886
+ uint32_t bestLeadReg = 0xFFFFFFFF;
887
+ RegMask allocableRegs = _availableRegs[group] & ~(outRegs | avoidRegs);
888
+
889
+ Support::BitWordIterator<uint32_t> it(lead->outRegMask());
890
+ while (it.hasNext()) {
891
+ uint32_t regIndex = it.next();
892
+ if (Support::bitTest(lead->outRegMask(), regIndex)) {
893
+ uint32_t score = 15;
894
+
895
+ for (i = 0; i < consecutiveCount; i++) {
896
+ uint32_t consecutiveIndex = regIndex + i;
897
+ if (!Support::bitTest(allocableRegs, consecutiveIndex)) {
898
+ score = 0;
899
+ break;
900
+ }
901
+
902
+ RAWorkReg* workReg = workRegById(consecutiveRegs[i]->workId());
903
+ score += uint32_t(workReg->homeRegId() == consecutiveIndex);
904
+ }
905
+
906
+ if (score > bestScore) {
907
+ bestScore = score;
908
+ bestLeadReg = regIndex;
909
+ }
910
+ }
911
+ }
912
+
913
+ if (bestLeadReg == 0xFFFFFFFF)
914
+ return DebugUtils::errored(kErrorConsecutiveRegsAllocation);
915
+
916
+ for (i = 0; i < consecutiveCount; i++) {
917
+ uint32_t consecutiveIndex = bestLeadReg + i;
918
+ RATiedReg* tiedReg = consecutiveRegs[i];
919
+ tiedReg->setOutId(consecutiveIndex);
920
+ }
921
+ }
922
+ }
923
+
924
+ // Allocate OUT registers.
925
+ for (i = 0; i < outTiedCount; i++) {
926
+ RATiedReg* tiedReg = outTiedRegs[i];
927
+ if (!tiedReg->isOut())
928
+ continue;
929
+
930
+ uint32_t workId = tiedReg->workId();
931
+ uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
932
+
933
+ if (assignedId != RAAssignment::kPhysNone)
934
+ ASMJIT_PROPAGATE(onKillReg(group, workId, assignedId));
935
+
936
+ uint32_t physId = tiedReg->outId();
937
+ if (physId == RAAssignment::kPhysNone) {
938
+ RegMask allocableRegs = tiedReg->outRegMask() & ~(outRegs | avoidRegs);
939
+
940
+ if (!(allocableRegs & ~liveRegs)) {
941
+ // There are no more registers, decide which one to spill.
942
+ uint32_t spillWorkId;
943
+ physId = decideOnSpillFor(group, workId, allocableRegs & liveRegs, &spillWorkId);
944
+ ASMJIT_PROPAGATE(onSpillReg(group, spillWorkId, physId));
945
+ }
946
+ else {
947
+ physId = decideOnAssignment(group, workId, RAAssignment::kPhysNone, allocableRegs & ~liveRegs);
948
+ }
949
+ }
950
+
951
+ // OUTs are CLOBBERed thus cannot be ASSIGNed right now.
952
+ ASMJIT_ASSERT(!_curAssignment.isPhysAssigned(group, physId));
953
+
954
+ if (!tiedReg->isKill())
955
+ ASMJIT_PROPAGATE(onAssignReg(group, workId, physId, true));
956
+
957
+ tiedReg->setOutId(physId);
958
+ tiedReg->markOutDone();
959
+
960
+ outRegs |= Support::bitMask(physId);
961
+ liveRegs &= ~Support::bitMask(physId);
962
+ outPending--;
963
+ }
964
+
965
+ clobberedByInst |= outRegs;
966
+ ASMJIT_ASSERT(outPending == 0);
967
+ }
968
+
969
+ _clobberedRegs[group] |= clobberedByInst;
970
+ }
971
+
972
+ return kErrorOk;
973
+ }
974
+
975
+ Error RALocalAllocator::spillAfterAllocation(InstNode* node) noexcept {
976
+ // This is experimental feature that would spill registers that don't have home-id and are last in this basic block.
977
+ // This prevents saving these regs in other basic blocks and then restoring them (mostly relevant for loops).
978
+ RAInst* raInst = node->passData<RAInst>();
979
+ uint32_t count = raInst->tiedCount();
980
+
981
+ for (uint32_t i = 0; i < count; i++) {
982
+ RATiedReg* tiedReg = raInst->tiedAt(i);
983
+ if (tiedReg->isLast()) {
984
+ uint32_t workId = tiedReg->workId();
985
+ RAWorkReg* workReg = workRegById(workId);
986
+ if (!workReg->hasHomeRegId()) {
987
+ RegGroup group = workReg->group();
988
+ uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
989
+ if (assignedId != RAAssignment::kPhysNone) {
990
+ _cc->_setCursor(node);
991
+ ASMJIT_PROPAGATE(onSpillReg(group, workId, assignedId));
992
+ }
993
+ }
994
+ }
995
+ }
996
+
997
+ return kErrorOk;
998
+ }
999
+
1000
+ Error RALocalAllocator::allocBranch(InstNode* node, RABlock* target, RABlock* cont) noexcept {
1001
+ // TODO: This should be used to make the branch allocation better.
1002
+ DebugUtils::unused(cont);
1003
+
1004
+ // The cursor must point to the previous instruction for a possible instruction insertion.
1005
+ _cc->_setCursor(node->prev());
1006
+
1007
+ // Use TryMode of `switchToAssignment()` if possible.
1008
+ if (target->hasEntryAssignment()) {
1009
+ ASMJIT_PROPAGATE(switchToAssignment(target->entryPhysToWorkMap(), target->liveIn(), target->isAllocated(), true));
1010
+ }
1011
+
1012
+ ASMJIT_PROPAGATE(allocInst(node));
1013
+ ASMJIT_PROPAGATE(spillRegsBeforeEntry(target));
1014
+
1015
+ if (target->hasEntryAssignment()) {
1016
+ BaseNode* injectionPoint = _pass->extraBlock()->prev();
1017
+ BaseNode* prevCursor = _cc->setCursor(injectionPoint);
1018
+
1019
+ _tmpAssignment.copyFrom(_curAssignment);
1020
+ ASMJIT_PROPAGATE(switchToAssignment(target->entryPhysToWorkMap(), target->liveIn(), target->isAllocated(), false));
1021
+
1022
+ BaseNode* curCursor = _cc->cursor();
1023
+ if (curCursor != injectionPoint) {
1024
+ // Additional instructions emitted to switch from the current state to the `target` state. This means
1025
+ // that we have to move these instructions into an independent code block and patch the jump location.
1026
+ Operand& targetOp = node->op(node->opCount() - 1);
1027
+ if (ASMJIT_UNLIKELY(!targetOp.isLabel()))
1028
+ return DebugUtils::errored(kErrorInvalidState);
1029
+
1030
+ Label trampoline = _cc->newLabel();
1031
+ Label savedTarget = targetOp.as<Label>();
1032
+
1033
+ // Patch `target` to point to the `trampoline` we just created.
1034
+ targetOp = trampoline;
1035
+
1036
+ // Clear a possible SHORT form as we have no clue now if the SHORT form would be encodable after patching
1037
+ // the target to `trampoline` (X86 specific).
1038
+ node->clearOptions(InstOptions::kShortForm);
1039
+
1040
+ // Finalize the switch assignment sequence.
1041
+ ASMJIT_PROPAGATE(_pass->emitJump(savedTarget));
1042
+ _cc->_setCursor(injectionPoint);
1043
+ _cc->bind(trampoline);
1044
+ }
1045
+
1046
+ _cc->_setCursor(prevCursor);
1047
+ _curAssignment.swap(_tmpAssignment);
1048
+ }
1049
+ else {
1050
+ ASMJIT_PROPAGATE(_pass->setBlockEntryAssignment(target, block(), _curAssignment));
1051
+ }
1052
+
1053
+ return kErrorOk;
1054
+ }
1055
+
1056
+ Error RALocalAllocator::allocJumpTable(InstNode* node, const RABlocks& targets, RABlock* cont) noexcept {
1057
+ // TODO: Do we really need to use `cont`?
1058
+ DebugUtils::unused(cont);
1059
+
1060
+ if (targets.empty())
1061
+ return DebugUtils::errored(kErrorInvalidState);
1062
+
1063
+ // The cursor must point to the previous instruction for a possible instruction insertion.
1064
+ _cc->_setCursor(node->prev());
1065
+
1066
+ // All `targets` should have the same sharedAssignmentId, we just read the first.
1067
+ RABlock* anyTarget = targets[0];
1068
+ if (!anyTarget->hasSharedAssignmentId())
1069
+ return DebugUtils::errored(kErrorInvalidState);
1070
+
1071
+ RASharedAssignment& sharedAssignment = _pass->_sharedAssignments[anyTarget->sharedAssignmentId()];
1072
+
1073
+ ASMJIT_PROPAGATE(allocInst(node));
1074
+
1075
+ if (!sharedAssignment.empty()) {
1076
+ ASMJIT_PROPAGATE(switchToAssignment(
1077
+ sharedAssignment.physToWorkMap(),
1078
+ sharedAssignment.liveIn(),
1079
+ true, // Read-only.
1080
+ false // Try-mode.
1081
+ ));
1082
+ }
1083
+
1084
+ ASMJIT_PROPAGATE(spillRegsBeforeEntry(anyTarget));
1085
+
1086
+ if (sharedAssignment.empty()) {
1087
+ ASMJIT_PROPAGATE(_pass->setBlockEntryAssignment(anyTarget, block(), _curAssignment));
1088
+ }
1089
+
1090
+ return kErrorOk;
1091
+ }
1092
+
1093
+ // RALocalAllocator - Decision Making
1094
+ // ==================================
1095
+
1096
+ uint32_t RALocalAllocator::decideOnAssignment(RegGroup group, uint32_t workId, uint32_t physId, RegMask allocableRegs) const noexcept {
1097
+ ASMJIT_ASSERT(allocableRegs != 0);
1098
+ DebugUtils::unused(group, physId);
1099
+
1100
+ RAWorkReg* workReg = workRegById(workId);
1101
+
1102
+ // Prefer home register id, if possible.
1103
+ if (workReg->hasHomeRegId()) {
1104
+ uint32_t homeId = workReg->homeRegId();
1105
+ if (Support::bitTest(allocableRegs, homeId))
1106
+ return homeId;
1107
+ }
1108
+
1109
+ // Prefer registers used upon block entries.
1110
+ RegMask previouslyAssignedRegs = workReg->allocatedMask();
1111
+ if (allocableRegs & previouslyAssignedRegs)
1112
+ allocableRegs &= previouslyAssignedRegs;
1113
+
1114
+ return Support::ctz(allocableRegs);
1115
+ }
1116
+
1117
+ uint32_t RALocalAllocator::decideOnReassignment(RegGroup group, uint32_t workId, uint32_t physId, RegMask allocableRegs) const noexcept {
1118
+ ASMJIT_ASSERT(allocableRegs != 0);
1119
+ DebugUtils::unused(group, physId);
1120
+
1121
+ RAWorkReg* workReg = workRegById(workId);
1122
+
1123
+ // Prefer allocating back to HomeId, if possible.
1124
+ if (workReg->hasHomeRegId()) {
1125
+ if (Support::bitTest(allocableRegs, workReg->homeRegId()))
1126
+ return workReg->homeRegId();
1127
+ }
1128
+
1129
+ // TODO: [Register Allocator] This could be improved.
1130
+
1131
+ // Decided to SPILL.
1132
+ return RAAssignment::kPhysNone;
1133
+ }
1134
+
1135
+ uint32_t RALocalAllocator::decideOnSpillFor(RegGroup group, uint32_t workId, RegMask spillableRegs, uint32_t* spillWorkId) const noexcept {
1136
+ // May be used in the future to decide which register would be best to spill so `workId` can be assigned.
1137
+ DebugUtils::unused(workId);
1138
+ ASMJIT_ASSERT(spillableRegs != 0);
1139
+
1140
+ Support::BitWordIterator<RegMask> it(spillableRegs);
1141
+ uint32_t bestPhysId = it.next();
1142
+ uint32_t bestWorkId = _curAssignment.physToWorkId(group, bestPhysId);
1143
+
1144
+ // Avoid calculating the cost model if there is only one spillable register.
1145
+ if (it.hasNext()) {
1146
+ uint32_t bestCost = calculateSpillCost(group, bestWorkId, bestPhysId);
1147
+ do {
1148
+ uint32_t localPhysId = it.next();
1149
+ uint32_t localWorkId = _curAssignment.physToWorkId(group, localPhysId);
1150
+ uint32_t localCost = calculateSpillCost(group, localWorkId, localPhysId);
1151
+
1152
+ if (localCost < bestCost) {
1153
+ bestCost = localCost;
1154
+ bestPhysId = localPhysId;
1155
+ bestWorkId = localWorkId;
1156
+ }
1157
+ } while (it.hasNext());
1158
+ }
1159
+
1160
+ *spillWorkId = bestWorkId;
1161
+ return bestPhysId;
1162
+ }
1163
+
1164
+ ASMJIT_END_NAMESPACE
1165
+
1166
+ #endif // !ASMJIT_NO_COMPILER