asmjit 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +1 -1
  3. data/asmjit.gemspec +1 -1
  4. data/ext/asmjit/asmjit/.editorconfig +10 -0
  5. data/ext/asmjit/asmjit/.github/FUNDING.yml +1 -0
  6. data/ext/asmjit/asmjit/.github/workflows/build-config.json +47 -0
  7. data/ext/asmjit/asmjit/.github/workflows/build.yml +156 -0
  8. data/ext/asmjit/asmjit/.gitignore +6 -0
  9. data/ext/asmjit/asmjit/CMakeLists.txt +611 -0
  10. data/ext/asmjit/asmjit/LICENSE.md +17 -0
  11. data/ext/asmjit/asmjit/README.md +69 -0
  12. data/ext/asmjit/asmjit/src/asmjit/a64.h +62 -0
  13. data/ext/asmjit/asmjit/src/asmjit/arm/a64archtraits_p.h +81 -0
  14. data/ext/asmjit/asmjit/src/asmjit/arm/a64assembler.cpp +5115 -0
  15. data/ext/asmjit/asmjit/src/asmjit/arm/a64assembler.h +72 -0
  16. data/ext/asmjit/asmjit/src/asmjit/arm/a64builder.cpp +51 -0
  17. data/ext/asmjit/asmjit/src/asmjit/arm/a64builder.h +57 -0
  18. data/ext/asmjit/asmjit/src/asmjit/arm/a64compiler.cpp +60 -0
  19. data/ext/asmjit/asmjit/src/asmjit/arm/a64compiler.h +247 -0
  20. data/ext/asmjit/asmjit/src/asmjit/arm/a64emithelper.cpp +464 -0
  21. data/ext/asmjit/asmjit/src/asmjit/arm/a64emithelper_p.h +50 -0
  22. data/ext/asmjit/asmjit/src/asmjit/arm/a64emitter.h +1228 -0
  23. data/ext/asmjit/asmjit/src/asmjit/arm/a64formatter.cpp +298 -0
  24. data/ext/asmjit/asmjit/src/asmjit/arm/a64formatter_p.h +59 -0
  25. data/ext/asmjit/asmjit/src/asmjit/arm/a64func.cpp +189 -0
  26. data/ext/asmjit/asmjit/src/asmjit/arm/a64func_p.h +33 -0
  27. data/ext/asmjit/asmjit/src/asmjit/arm/a64globals.h +1894 -0
  28. data/ext/asmjit/asmjit/src/asmjit/arm/a64instapi.cpp +278 -0
  29. data/ext/asmjit/asmjit/src/asmjit/arm/a64instapi_p.h +41 -0
  30. data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb.cpp +1957 -0
  31. data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb.h +74 -0
  32. data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb_p.h +876 -0
  33. data/ext/asmjit/asmjit/src/asmjit/arm/a64operand.cpp +85 -0
  34. data/ext/asmjit/asmjit/src/asmjit/arm/a64operand.h +312 -0
  35. data/ext/asmjit/asmjit/src/asmjit/arm/a64rapass.cpp +852 -0
  36. data/ext/asmjit/asmjit/src/asmjit/arm/a64rapass_p.h +105 -0
  37. data/ext/asmjit/asmjit/src/asmjit/arm/a64utils.h +179 -0
  38. data/ext/asmjit/asmjit/src/asmjit/arm/armformatter.cpp +143 -0
  39. data/ext/asmjit/asmjit/src/asmjit/arm/armformatter_p.h +44 -0
  40. data/ext/asmjit/asmjit/src/asmjit/arm/armglobals.h +21 -0
  41. data/ext/asmjit/asmjit/src/asmjit/arm/armoperand.h +621 -0
  42. data/ext/asmjit/asmjit/src/asmjit/arm.h +62 -0
  43. data/ext/asmjit/asmjit/src/asmjit/asmjit-scope-begin.h +17 -0
  44. data/ext/asmjit/asmjit/src/asmjit/asmjit-scope-end.h +9 -0
  45. data/ext/asmjit/asmjit/src/asmjit/asmjit.h +33 -0
  46. data/ext/asmjit/asmjit/src/asmjit/core/api-build_p.h +55 -0
  47. data/ext/asmjit/asmjit/src/asmjit/core/api-config.h +613 -0
  48. data/ext/asmjit/asmjit/src/asmjit/core/archcommons.h +229 -0
  49. data/ext/asmjit/asmjit/src/asmjit/core/archtraits.cpp +160 -0
  50. data/ext/asmjit/asmjit/src/asmjit/core/archtraits.h +290 -0
  51. data/ext/asmjit/asmjit/src/asmjit/core/assembler.cpp +406 -0
  52. data/ext/asmjit/asmjit/src/asmjit/core/assembler.h +129 -0
  53. data/ext/asmjit/asmjit/src/asmjit/core/builder.cpp +889 -0
  54. data/ext/asmjit/asmjit/src/asmjit/core/builder.h +1391 -0
  55. data/ext/asmjit/asmjit/src/asmjit/core/codebuffer.h +113 -0
  56. data/ext/asmjit/asmjit/src/asmjit/core/codeholder.cpp +1149 -0
  57. data/ext/asmjit/asmjit/src/asmjit/core/codeholder.h +1035 -0
  58. data/ext/asmjit/asmjit/src/asmjit/core/codewriter.cpp +175 -0
  59. data/ext/asmjit/asmjit/src/asmjit/core/codewriter_p.h +179 -0
  60. data/ext/asmjit/asmjit/src/asmjit/core/compiler.cpp +582 -0
  61. data/ext/asmjit/asmjit/src/asmjit/core/compiler.h +737 -0
  62. data/ext/asmjit/asmjit/src/asmjit/core/compilerdefs.h +173 -0
  63. data/ext/asmjit/asmjit/src/asmjit/core/constpool.cpp +363 -0
  64. data/ext/asmjit/asmjit/src/asmjit/core/constpool.h +250 -0
  65. data/ext/asmjit/asmjit/src/asmjit/core/cpuinfo.cpp +1162 -0
  66. data/ext/asmjit/asmjit/src/asmjit/core/cpuinfo.h +813 -0
  67. data/ext/asmjit/asmjit/src/asmjit/core/emithelper.cpp +323 -0
  68. data/ext/asmjit/asmjit/src/asmjit/core/emithelper_p.h +58 -0
  69. data/ext/asmjit/asmjit/src/asmjit/core/emitter.cpp +333 -0
  70. data/ext/asmjit/asmjit/src/asmjit/core/emitter.h +741 -0
  71. data/ext/asmjit/asmjit/src/asmjit/core/emitterutils.cpp +129 -0
  72. data/ext/asmjit/asmjit/src/asmjit/core/emitterutils_p.h +89 -0
  73. data/ext/asmjit/asmjit/src/asmjit/core/environment.cpp +46 -0
  74. data/ext/asmjit/asmjit/src/asmjit/core/environment.h +508 -0
  75. data/ext/asmjit/asmjit/src/asmjit/core/errorhandler.cpp +14 -0
  76. data/ext/asmjit/asmjit/src/asmjit/core/errorhandler.h +228 -0
  77. data/ext/asmjit/asmjit/src/asmjit/core/formatter.cpp +584 -0
  78. data/ext/asmjit/asmjit/src/asmjit/core/formatter.h +247 -0
  79. data/ext/asmjit/asmjit/src/asmjit/core/formatter_p.h +34 -0
  80. data/ext/asmjit/asmjit/src/asmjit/core/func.cpp +286 -0
  81. data/ext/asmjit/asmjit/src/asmjit/core/func.h +1445 -0
  82. data/ext/asmjit/asmjit/src/asmjit/core/funcargscontext.cpp +293 -0
  83. data/ext/asmjit/asmjit/src/asmjit/core/funcargscontext_p.h +199 -0
  84. data/ext/asmjit/asmjit/src/asmjit/core/globals.cpp +133 -0
  85. data/ext/asmjit/asmjit/src/asmjit/core/globals.h +393 -0
  86. data/ext/asmjit/asmjit/src/asmjit/core/inst.cpp +113 -0
  87. data/ext/asmjit/asmjit/src/asmjit/core/inst.h +772 -0
  88. data/ext/asmjit/asmjit/src/asmjit/core/jitallocator.cpp +1242 -0
  89. data/ext/asmjit/asmjit/src/asmjit/core/jitallocator.h +261 -0
  90. data/ext/asmjit/asmjit/src/asmjit/core/jitruntime.cpp +80 -0
  91. data/ext/asmjit/asmjit/src/asmjit/core/jitruntime.h +89 -0
  92. data/ext/asmjit/asmjit/src/asmjit/core/logger.cpp +69 -0
  93. data/ext/asmjit/asmjit/src/asmjit/core/logger.h +198 -0
  94. data/ext/asmjit/asmjit/src/asmjit/core/misc_p.h +33 -0
  95. data/ext/asmjit/asmjit/src/asmjit/core/operand.cpp +132 -0
  96. data/ext/asmjit/asmjit/src/asmjit/core/operand.h +1611 -0
  97. data/ext/asmjit/asmjit/src/asmjit/core/osutils.cpp +84 -0
  98. data/ext/asmjit/asmjit/src/asmjit/core/osutils.h +61 -0
  99. data/ext/asmjit/asmjit/src/asmjit/core/osutils_p.h +68 -0
  100. data/ext/asmjit/asmjit/src/asmjit/core/raassignment_p.h +418 -0
  101. data/ext/asmjit/asmjit/src/asmjit/core/rabuilders_p.h +612 -0
  102. data/ext/asmjit/asmjit/src/asmjit/core/radefs_p.h +1204 -0
  103. data/ext/asmjit/asmjit/src/asmjit/core/ralocal.cpp +1166 -0
  104. data/ext/asmjit/asmjit/src/asmjit/core/ralocal_p.h +254 -0
  105. data/ext/asmjit/asmjit/src/asmjit/core/rapass.cpp +1969 -0
  106. data/ext/asmjit/asmjit/src/asmjit/core/rapass_p.h +1183 -0
  107. data/ext/asmjit/asmjit/src/asmjit/core/rastack.cpp +184 -0
  108. data/ext/asmjit/asmjit/src/asmjit/core/rastack_p.h +171 -0
  109. data/ext/asmjit/asmjit/src/asmjit/core/string.cpp +559 -0
  110. data/ext/asmjit/asmjit/src/asmjit/core/string.h +372 -0
  111. data/ext/asmjit/asmjit/src/asmjit/core/support.cpp +494 -0
  112. data/ext/asmjit/asmjit/src/asmjit/core/support.h +1773 -0
  113. data/ext/asmjit/asmjit/src/asmjit/core/target.cpp +14 -0
  114. data/ext/asmjit/asmjit/src/asmjit/core/target.h +53 -0
  115. data/ext/asmjit/asmjit/src/asmjit/core/type.cpp +74 -0
  116. data/ext/asmjit/asmjit/src/asmjit/core/type.h +419 -0
  117. data/ext/asmjit/asmjit/src/asmjit/core/virtmem.cpp +722 -0
  118. data/ext/asmjit/asmjit/src/asmjit/core/virtmem.h +242 -0
  119. data/ext/asmjit/asmjit/src/asmjit/core/zone.cpp +353 -0
  120. data/ext/asmjit/asmjit/src/asmjit/core/zone.h +615 -0
  121. data/ext/asmjit/asmjit/src/asmjit/core/zonehash.cpp +309 -0
  122. data/ext/asmjit/asmjit/src/asmjit/core/zonehash.h +186 -0
  123. data/ext/asmjit/asmjit/src/asmjit/core/zonelist.cpp +163 -0
  124. data/ext/asmjit/asmjit/src/asmjit/core/zonelist.h +209 -0
  125. data/ext/asmjit/asmjit/src/asmjit/core/zonestack.cpp +176 -0
  126. data/ext/asmjit/asmjit/src/asmjit/core/zonestack.h +239 -0
  127. data/ext/asmjit/asmjit/src/asmjit/core/zonestring.h +120 -0
  128. data/ext/asmjit/asmjit/src/asmjit/core/zonetree.cpp +99 -0
  129. data/ext/asmjit/asmjit/src/asmjit/core/zonetree.h +380 -0
  130. data/ext/asmjit/asmjit/src/asmjit/core/zonevector.cpp +356 -0
  131. data/ext/asmjit/asmjit/src/asmjit/core/zonevector.h +690 -0
  132. data/ext/asmjit/asmjit/src/asmjit/core.h +1861 -0
  133. data/ext/asmjit/asmjit/src/asmjit/x86/x86archtraits_p.h +148 -0
  134. data/ext/asmjit/asmjit/src/asmjit/x86/x86assembler.cpp +5110 -0
  135. data/ext/asmjit/asmjit/src/asmjit/x86/x86assembler.h +685 -0
  136. data/ext/asmjit/asmjit/src/asmjit/x86/x86builder.cpp +52 -0
  137. data/ext/asmjit/asmjit/src/asmjit/x86/x86builder.h +351 -0
  138. data/ext/asmjit/asmjit/src/asmjit/x86/x86compiler.cpp +61 -0
  139. data/ext/asmjit/asmjit/src/asmjit/x86/x86compiler.h +721 -0
  140. data/ext/asmjit/asmjit/src/asmjit/x86/x86emithelper.cpp +619 -0
  141. data/ext/asmjit/asmjit/src/asmjit/x86/x86emithelper_p.h +60 -0
  142. data/ext/asmjit/asmjit/src/asmjit/x86/x86emitter.h +4315 -0
  143. data/ext/asmjit/asmjit/src/asmjit/x86/x86formatter.cpp +944 -0
  144. data/ext/asmjit/asmjit/src/asmjit/x86/x86formatter_p.h +58 -0
  145. data/ext/asmjit/asmjit/src/asmjit/x86/x86func.cpp +503 -0
  146. data/ext/asmjit/asmjit/src/asmjit/x86/x86func_p.h +33 -0
  147. data/ext/asmjit/asmjit/src/asmjit/x86/x86globals.h +2169 -0
  148. data/ext/asmjit/asmjit/src/asmjit/x86/x86instapi.cpp +1732 -0
  149. data/ext/asmjit/asmjit/src/asmjit/x86/x86instapi_p.h +41 -0
  150. data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb.cpp +4427 -0
  151. data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb.h +563 -0
  152. data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb_p.h +311 -0
  153. data/ext/asmjit/asmjit/src/asmjit/x86/x86opcode_p.h +436 -0
  154. data/ext/asmjit/asmjit/src/asmjit/x86/x86operand.cpp +231 -0
  155. data/ext/asmjit/asmjit/src/asmjit/x86/x86operand.h +1085 -0
  156. data/ext/asmjit/asmjit/src/asmjit/x86/x86rapass.cpp +1509 -0
  157. data/ext/asmjit/asmjit/src/asmjit/x86/x86rapass_p.h +94 -0
  158. data/ext/asmjit/asmjit/src/asmjit/x86.h +93 -0
  159. data/ext/asmjit/asmjit/src/asmjit.natvis +245 -0
  160. data/ext/asmjit/asmjit/test/asmjit_test_assembler.cpp +84 -0
  161. data/ext/asmjit/asmjit/test/asmjit_test_assembler.h +85 -0
  162. data/ext/asmjit/asmjit/test/asmjit_test_assembler_a64.cpp +4006 -0
  163. data/ext/asmjit/asmjit/test/asmjit_test_assembler_x64.cpp +17833 -0
  164. data/ext/asmjit/asmjit/test/asmjit_test_assembler_x86.cpp +8300 -0
  165. data/ext/asmjit/asmjit/test/asmjit_test_compiler.cpp +253 -0
  166. data/ext/asmjit/asmjit/test/asmjit_test_compiler.h +73 -0
  167. data/ext/asmjit/asmjit/test/asmjit_test_compiler_a64.cpp +690 -0
  168. data/ext/asmjit/asmjit/test/asmjit_test_compiler_x86.cpp +4317 -0
  169. data/ext/asmjit/asmjit/test/asmjit_test_emitters.cpp +197 -0
  170. data/ext/asmjit/asmjit/test/asmjit_test_instinfo.cpp +181 -0
  171. data/ext/asmjit/asmjit/test/asmjit_test_misc.h +257 -0
  172. data/ext/asmjit/asmjit/test/asmjit_test_perf.cpp +62 -0
  173. data/ext/asmjit/asmjit/test/asmjit_test_perf.h +61 -0
  174. data/ext/asmjit/asmjit/test/asmjit_test_perf_a64.cpp +699 -0
  175. data/ext/asmjit/asmjit/test/asmjit_test_perf_x86.cpp +5032 -0
  176. data/ext/asmjit/asmjit/test/asmjit_test_unit.cpp +172 -0
  177. data/ext/asmjit/asmjit/test/asmjit_test_x86_sections.cpp +172 -0
  178. data/ext/asmjit/asmjit/test/asmjitutils.h +38 -0
  179. data/ext/asmjit/asmjit/test/broken.cpp +312 -0
  180. data/ext/asmjit/asmjit/test/broken.h +148 -0
  181. data/ext/asmjit/asmjit/test/cmdline.h +61 -0
  182. data/ext/asmjit/asmjit/test/performancetimer.h +41 -0
  183. data/ext/asmjit/asmjit/tools/configure-makefiles.sh +13 -0
  184. data/ext/asmjit/asmjit/tools/configure-ninja.sh +13 -0
  185. data/ext/asmjit/asmjit/tools/configure-sanitizers.sh +13 -0
  186. data/ext/asmjit/asmjit/tools/configure-vs2019-x64.bat +2 -0
  187. data/ext/asmjit/asmjit/tools/configure-vs2019-x86.bat +2 -0
  188. data/ext/asmjit/asmjit/tools/configure-vs2022-x64.bat +2 -0
  189. data/ext/asmjit/asmjit/tools/configure-vs2022-x86.bat +2 -0
  190. data/ext/asmjit/asmjit/tools/configure-xcode.sh +8 -0
  191. data/ext/asmjit/asmjit/tools/enumgen.js +417 -0
  192. data/ext/asmjit/asmjit/tools/enumgen.sh +3 -0
  193. data/ext/asmjit/asmjit/tools/tablegen-arm.js +365 -0
  194. data/ext/asmjit/asmjit/tools/tablegen-arm.sh +3 -0
  195. data/ext/asmjit/asmjit/tools/tablegen-x86.js +2638 -0
  196. data/ext/asmjit/asmjit/tools/tablegen-x86.sh +3 -0
  197. data/ext/asmjit/asmjit/tools/tablegen.js +947 -0
  198. data/ext/asmjit/asmjit/tools/tablegen.sh +4 -0
  199. data/ext/asmjit/asmjit.cc +18 -0
  200. data/lib/asmjit/version.rb +1 -1
  201. metadata +197 -2
@@ -0,0 +1,1969 @@
1
+ // This file is part of AsmJit project <https://asmjit.com>
2
+ //
3
+ // See asmjit.h or LICENSE.md for license and copyright information
4
+ // SPDX-License-Identifier: Zlib
5
+
6
+ #include "../core/api-build_p.h"
7
+ #ifndef ASMJIT_NO_COMPILER
8
+
9
+ #include "../core/formatter.h"
10
+ #include "../core/ralocal_p.h"
11
+ #include "../core/rapass_p.h"
12
+ #include "../core/support.h"
13
+ #include "../core/type.h"
14
+ #include "../core/zonestack.h"
15
+
16
+ ASMJIT_BEGIN_NAMESPACE
17
+
18
+ // RABlock - Control Flow
19
+ // ======================
20
+
21
+ Error RABlock::appendSuccessor(RABlock* successor) noexcept {
22
+ RABlock* predecessor = this;
23
+
24
+ if (predecessor->hasSuccessor(successor))
25
+ return kErrorOk;
26
+
27
+ ASMJIT_PROPAGATE(successor->_predecessors.willGrow(allocator()));
28
+ ASMJIT_PROPAGATE(predecessor->_successors.willGrow(allocator()));
29
+
30
+ predecessor->_successors.appendUnsafe(successor);
31
+ successor->_predecessors.appendUnsafe(predecessor);
32
+
33
+ return kErrorOk;
34
+ }
35
+
36
+ Error RABlock::prependSuccessor(RABlock* successor) noexcept {
37
+ RABlock* predecessor = this;
38
+
39
+ if (predecessor->hasSuccessor(successor))
40
+ return kErrorOk;
41
+
42
+ ASMJIT_PROPAGATE(successor->_predecessors.willGrow(allocator()));
43
+ ASMJIT_PROPAGATE(predecessor->_successors.willGrow(allocator()));
44
+
45
+ predecessor->_successors.prependUnsafe(successor);
46
+ successor->_predecessors.prependUnsafe(predecessor);
47
+
48
+ return kErrorOk;
49
+ }
50
+
51
+ // BaseRAPass - Construction & Destruction
52
+ // =======================================
53
+
54
+ BaseRAPass::BaseRAPass() noexcept : FuncPass("BaseRAPass") {}
55
+ BaseRAPass::~BaseRAPass() noexcept {}
56
+
57
+ // BaseRAPass - RunOnFunction
58
+ // ==========================
59
+
60
+ static void BaseRAPass_reset(BaseRAPass* self, FuncDetail* funcDetail) noexcept {
61
+ ZoneAllocator* allocator = self->allocator();
62
+
63
+ self->_blocks.reset();
64
+ self->_exits.reset();
65
+ self->_pov.reset();
66
+ self->_workRegs.reset();
67
+ self->_instructionCount = 0;
68
+ self->_createdBlockCount = 0;
69
+
70
+ self->_sharedAssignments.reset();
71
+ self->_lastTimestamp = 0;
72
+
73
+ self->_archTraits = nullptr;
74
+ self->_physRegIndex.reset();
75
+ self->_physRegCount.reset();
76
+ self->_physRegTotal = 0;
77
+ self->_scratchRegIndexes.fill(BaseReg::kIdBad);
78
+
79
+ self->_availableRegs.reset();
80
+ self->_availableRegCount.reset();
81
+ self->_clobberedRegs.reset();
82
+
83
+ self->_workRegs.reset();
84
+ self->_workRegsOfGroup.forEach([](RAWorkRegs& regs) { regs.reset(); });
85
+ self->_strategy.forEach([](RAStrategy& strategy) { strategy.reset(); });
86
+ self->_globalLiveSpans.fill(nullptr);
87
+ self->_globalMaxLiveCount.reset();
88
+ self->_temporaryMem.reset();
89
+
90
+ self->_stackAllocator.reset(allocator);
91
+ self->_argsAssignment.reset(funcDetail);
92
+ self->_numStackArgsToStackSlots = 0;
93
+ self->_maxWorkRegNameSize = 0;
94
+ }
95
+
96
+ static void BaseRAPass_resetVirtRegData(BaseRAPass* self) noexcept {
97
+ for (RAWorkReg* wReg : self->_workRegs) {
98
+ VirtReg* vReg = wReg->virtReg();
99
+
100
+ // Update the information regarding the stack of the virtual register.
101
+ if (wReg->hasStackSlot()) {
102
+ RAStackSlot* slot = wReg->stackSlot();
103
+ vReg->assignStackSlot(slot->offset());
104
+ }
105
+
106
+ // Reset work reg association so it cannot be used by accident (RAWorkReg data will be destroyed).
107
+ vReg->_workReg = nullptr;
108
+ }
109
+ }
110
+
111
+ Error BaseRAPass::runOnFunction(Zone* zone, Logger* logger, FuncNode* func) {
112
+ _allocator.reset(zone);
113
+
114
+ #ifndef ASMJIT_NO_LOGGING
115
+ _logger = logger;
116
+ _formatOptions.reset();
117
+ _diagnosticOptions = DiagnosticOptions::kNone;
118
+
119
+ if (logger) {
120
+ _formatOptions = logger->options();
121
+ _diagnosticOptions = _cb->diagnosticOptions();
122
+ }
123
+ #else
124
+ DebugUtils::unused(logger);
125
+ #endif
126
+
127
+ // Initialize all core structures to use `zone` and `func`.
128
+ BaseNode* end = func->endNode();
129
+ _func = func;
130
+ _stop = end->next();
131
+ _extraBlock = end;
132
+
133
+ BaseRAPass_reset(this, &_func->_funcDetail);
134
+
135
+ // Initialize architecture-specific members.
136
+ onInit();
137
+
138
+ // Perform all allocation steps required.
139
+ Error err = onPerformAllSteps();
140
+
141
+ // Must be called regardless of the allocation status.
142
+ onDone();
143
+
144
+ // Reset possible connections introduced by the register allocator.
145
+ BaseRAPass_resetVirtRegData(this);
146
+
147
+ // Reset all core structures and everything that depends on the passed `Zone`.
148
+ BaseRAPass_reset(this, nullptr);
149
+ _allocator.reset(nullptr);
150
+
151
+ #ifndef ASMJIT_NO_LOGGING
152
+ _logger = nullptr;
153
+ _formatOptions.reset();
154
+ _diagnosticOptions = DiagnosticOptions::kNone;
155
+ #endif
156
+
157
+ _func = nullptr;
158
+ _stop = nullptr;
159
+ _extraBlock = nullptr;
160
+
161
+ // Reset `Zone` as nothing should persist between `runOnFunction()` calls.
162
+ zone->reset();
163
+
164
+ // We alter the compiler cursor, because it doesn't make sense to reference it after the compilation - some
165
+ // nodes may disappear and the old cursor can go out anyway.
166
+ cc()->_setCursor(cc()->lastNode());
167
+
168
+ return err;
169
+ }
170
+
171
+ Error BaseRAPass::onPerformAllSteps() noexcept {
172
+ ASMJIT_PROPAGATE(buildCFG());
173
+ ASMJIT_PROPAGATE(buildCFGViews());
174
+ ASMJIT_PROPAGATE(removeUnreachableCode());
175
+
176
+ ASMJIT_PROPAGATE(buildCFGDominators());
177
+ ASMJIT_PROPAGATE(buildLiveness());
178
+ ASMJIT_PROPAGATE(assignArgIndexToWorkRegs());
179
+
180
+ #ifndef ASMJIT_NO_LOGGING
181
+ if (hasDiagnosticOption(DiagnosticOptions::kRAAnnotate))
182
+ ASMJIT_PROPAGATE(annotateCode());
183
+ #endif
184
+
185
+ ASMJIT_PROPAGATE(runGlobalAllocator());
186
+ ASMJIT_PROPAGATE(runLocalAllocator());
187
+
188
+ ASMJIT_PROPAGATE(updateStackFrame());
189
+ ASMJIT_PROPAGATE(insertPrologEpilog());
190
+
191
+ ASMJIT_PROPAGATE(rewrite());
192
+
193
+ return kErrorOk;
194
+ }
195
+
196
+ // BaseRAPass - CFG - Basic Block Management
197
+ // =========================================
198
+
199
+ RABlock* BaseRAPass::newBlock(BaseNode* initialNode) noexcept {
200
+ RABlock* block = zone()->newT<RABlock>(this);
201
+ if (ASMJIT_UNLIKELY(!block))
202
+ return nullptr;
203
+
204
+ block->setFirst(initialNode);
205
+ block->setLast(initialNode);
206
+
207
+ _createdBlockCount++;
208
+ return block;
209
+ }
210
+
211
+ RABlock* BaseRAPass::newBlockOrExistingAt(LabelNode* cbLabel, BaseNode** stoppedAt) noexcept {
212
+ if (cbLabel->hasPassData())
213
+ return cbLabel->passData<RABlock>();
214
+
215
+ FuncNode* func = this->func();
216
+ BaseNode* node = cbLabel->prev();
217
+ RABlock* block = nullptr;
218
+
219
+ // Try to find some label, but terminate the loop on any code. We try hard to coalesce code that contains two
220
+ // consecutive labels or a combination of non-code nodes between 2 or more labels.
221
+ //
222
+ // Possible cases that would share the same basic block:
223
+ //
224
+ // 1. Two or more consecutive labels:
225
+ // Label1:
226
+ // Label2:
227
+ //
228
+ // 2. Two or more labels separated by non-code nodes:
229
+ // Label1:
230
+ // ; Some comment...
231
+ // .align 16
232
+ // Label2:
233
+ size_t nPendingLabels = 0;
234
+
235
+ while (node) {
236
+ if (node->type() == NodeType::kLabel) {
237
+ // Function has a different NodeType, just make sure this was not messed up as we must never associate
238
+ // BasicBlock with a `func` itself.
239
+ ASMJIT_ASSERT(node != func);
240
+
241
+ block = node->passData<RABlock>();
242
+ if (block) {
243
+ // Exit node has always a block associated with it. If we went here it means that `cbLabel` passed here
244
+ // is after the end of the function and cannot be merged with the function exit block.
245
+ if (node == func->exitNode())
246
+ block = nullptr;
247
+ break;
248
+ }
249
+
250
+ nPendingLabels++;
251
+ }
252
+ else if (node->type() == NodeType::kAlign) {
253
+ // Align node is fine.
254
+ }
255
+ else {
256
+ break;
257
+ }
258
+
259
+ node = node->prev();
260
+ }
261
+
262
+ if (stoppedAt)
263
+ *stoppedAt = node;
264
+
265
+ if (!block) {
266
+ block = newBlock();
267
+ if (ASMJIT_UNLIKELY(!block))
268
+ return nullptr;
269
+ }
270
+
271
+ cbLabel->setPassData<RABlock>(block);
272
+ node = cbLabel;
273
+
274
+ while (nPendingLabels) {
275
+ node = node->prev();
276
+ for (;;) {
277
+ if (node->type() == NodeType::kLabel) {
278
+ node->setPassData<RABlock>(block);
279
+ nPendingLabels--;
280
+ break;
281
+ }
282
+
283
+ node = node->prev();
284
+ ASMJIT_ASSERT(node != nullptr);
285
+ }
286
+ }
287
+
288
+ if (!block->first()) {
289
+ block->setFirst(node);
290
+ block->setLast(cbLabel);
291
+ }
292
+
293
+ return block;
294
+ }
295
+
296
+ Error BaseRAPass::addBlock(RABlock* block) noexcept {
297
+ ASMJIT_PROPAGATE(_blocks.willGrow(allocator()));
298
+
299
+ block->_blockId = blockCount();
300
+ _blocks.appendUnsafe(block);
301
+ return kErrorOk;
302
+ }
303
+
304
+ // BaseRAPass - CFG - Build
305
+ // ========================
306
+
307
+ Error BaseRAPass::initSharedAssignments(const ZoneVector<uint32_t>& sharedAssignmentsMap) noexcept {
308
+ if (sharedAssignmentsMap.empty())
309
+ return kErrorOk;
310
+
311
+ uint32_t count = 0;
312
+ for (RABlock* block : _blocks) {
313
+ if (block->hasSharedAssignmentId()) {
314
+ uint32_t sharedAssignmentId = sharedAssignmentsMap[block->sharedAssignmentId()];
315
+ block->setSharedAssignmentId(sharedAssignmentId);
316
+ count = Support::max(count, sharedAssignmentId + 1);
317
+ }
318
+ }
319
+
320
+ ASMJIT_PROPAGATE(_sharedAssignments.resize(allocator(), count));
321
+
322
+ // Aggregate all entry scratch GP regs from blocks of the same assignment to the assignment itself. It will then be
323
+ // used instead of RABlock's own scratch regs mask, as shared assignments have precedence.
324
+ for (RABlock* block : _blocks) {
325
+ if (block->hasJumpTable()) {
326
+ const RABlocks& successors = block->successors();
327
+ if (!successors.empty()) {
328
+ RABlock* firstSuccessor = successors[0];
329
+ // NOTE: Shared assignments connect all possible successors so we only need the first to propagate exit scratch
330
+ // GP registers.
331
+ ASMJIT_ASSERT(firstSuccessor->hasSharedAssignmentId());
332
+ RASharedAssignment& sa = _sharedAssignments[firstSuccessor->sharedAssignmentId()];
333
+ sa.addEntryScratchGpRegs(block->exitScratchGpRegs());
334
+ }
335
+ }
336
+ if (block->hasSharedAssignmentId()) {
337
+ RASharedAssignment& sa = _sharedAssignments[block->sharedAssignmentId()];
338
+ sa.addEntryScratchGpRegs(block->_entryScratchGpRegs);
339
+ }
340
+ }
341
+
342
+ return kErrorOk;
343
+ }
344
+
345
+ // BaseRAPass - CFG - Views Order
346
+ // ==============================
347
+
348
+ class RABlockVisitItem {
349
+ public:
350
+ inline RABlockVisitItem(RABlock* block, uint32_t index) noexcept
351
+ : _block(block),
352
+ _index(index) {}
353
+
354
+ inline RABlockVisitItem(const RABlockVisitItem& other) noexcept
355
+ : _block(other._block),
356
+ _index(other._index) {}
357
+
358
+ inline RABlockVisitItem& operator=(const RABlockVisitItem& other) noexcept = default;
359
+
360
+ inline RABlock* block() const noexcept { return _block; }
361
+ inline uint32_t index() const noexcept { return _index; }
362
+
363
+ RABlock* _block;
364
+ uint32_t _index;
365
+ };
366
+
367
+ Error BaseRAPass::buildCFGViews() noexcept {
368
+ #ifndef ASMJIT_NO_LOGGING
369
+ Logger* logger = getLoggerIf(DiagnosticOptions::kRADebugCFG);
370
+ ASMJIT_RA_LOG_FORMAT("[BuildCFGViews]\n");
371
+ #endif
372
+
373
+ uint32_t count = blockCount();
374
+ if (ASMJIT_UNLIKELY(!count)) return kErrorOk;
375
+
376
+ ASMJIT_PROPAGATE(_pov.reserve(allocator(), count));
377
+
378
+ ZoneStack<RABlockVisitItem> stack;
379
+ ASMJIT_PROPAGATE(stack.init(allocator()));
380
+
381
+ ZoneBitVector visited;
382
+ ASMJIT_PROPAGATE(visited.resize(allocator(), count));
383
+
384
+ RABlock* current = _blocks[0];
385
+ uint32_t i = 0;
386
+
387
+ for (;;) {
388
+ for (;;) {
389
+ if (i >= current->successors().size())
390
+ break;
391
+
392
+ // Skip if already visited.
393
+ RABlock* child = current->successors()[i++];
394
+ if (visited.bitAt(child->blockId()))
395
+ continue;
396
+
397
+ // Mark as visited to prevent visiting the same block multiple times.
398
+ visited.setBit(child->blockId(), true);
399
+
400
+ // Add the current block on the stack, we will get back to it later.
401
+ ASMJIT_PROPAGATE(stack.append(RABlockVisitItem(current, i)));
402
+ current = child;
403
+ i = 0;
404
+ }
405
+
406
+ current->makeReachable();
407
+ current->_povOrder = _pov.size();
408
+ _pov.appendUnsafe(current);
409
+
410
+ if (stack.empty())
411
+ break;
412
+
413
+ RABlockVisitItem top = stack.pop();
414
+ current = top.block();
415
+ i = top.index();
416
+ }
417
+
418
+ ASMJIT_RA_LOG_COMPLEX({
419
+ StringTmp<1024> sb;
420
+ for (RABlock* block : blocks()) {
421
+ sb.clear();
422
+ if (block->hasSuccessors()) {
423
+ sb.appendFormat(" #%u -> {", block->blockId());
424
+ _dumpBlockIds(sb, block->successors());
425
+ sb.append("}\n");
426
+ }
427
+ else {
428
+ sb.appendFormat(" #%u -> {Exit}\n", block->blockId());
429
+ }
430
+ logger->log(sb);
431
+ }
432
+ });
433
+
434
+ visited.release(allocator());
435
+ return kErrorOk;
436
+ }
437
+
438
+ // BaseRAPass - CFG - Dominators
439
+ // =============================
440
+
441
+ static ASMJIT_FORCE_INLINE RABlock* intersectBlocks(RABlock* b1, RABlock* b2) noexcept {
442
+ while (b1 != b2) {
443
+ while (b2->povOrder() > b1->povOrder()) b1 = b1->iDom();
444
+ while (b1->povOrder() > b2->povOrder()) b2 = b2->iDom();
445
+ }
446
+ return b1;
447
+ }
448
+
449
+ // Based on "A Simple, Fast Dominance Algorithm".
450
+ Error BaseRAPass::buildCFGDominators() noexcept {
451
+ #ifndef ASMJIT_NO_LOGGING
452
+ Logger* logger = getLoggerIf(DiagnosticOptions::kRADebugCFG);
453
+ ASMJIT_RA_LOG_FORMAT("[BuildCFGDominators]\n");
454
+ #endif
455
+
456
+ if (_blocks.empty())
457
+ return kErrorOk;
458
+
459
+ RABlock* entryBlock = this->entryBlock();
460
+ entryBlock->setIDom(entryBlock);
461
+
462
+ bool changed = true;
463
+ uint32_t nIters = 0;
464
+
465
+ while (changed) {
466
+ nIters++;
467
+ changed = false;
468
+
469
+ uint32_t i = _pov.size();
470
+ while (i) {
471
+ RABlock* block = _pov[--i];
472
+ if (block == entryBlock)
473
+ continue;
474
+
475
+ RABlock* iDom = nullptr;
476
+ const RABlocks& preds = block->predecessors();
477
+
478
+ uint32_t j = preds.size();
479
+ while (j) {
480
+ RABlock* p = preds[--j];
481
+ if (!p->iDom())
482
+ continue;
483
+ iDom = !iDom ? p : intersectBlocks(iDom, p);
484
+ }
485
+
486
+ if (block->iDom() != iDom) {
487
+ ASMJIT_ASSUME(iDom != nullptr);
488
+ ASMJIT_RA_LOG_FORMAT(" IDom of #%u -> #%u\n", block->blockId(), iDom->blockId());
489
+ block->setIDom(iDom);
490
+ changed = true;
491
+ }
492
+ }
493
+ }
494
+
495
+ ASMJIT_RA_LOG_FORMAT(" Done (%u iterations)\n", nIters);
496
+ return kErrorOk;
497
+ }
498
+
499
+ bool BaseRAPass::_strictlyDominates(const RABlock* a, const RABlock* b) const noexcept {
500
+ ASMJIT_ASSERT(a != nullptr); // There must be at least one block if this function is
501
+ ASMJIT_ASSERT(b != nullptr); // called, as both `a` and `b` must be valid blocks.
502
+ ASMJIT_ASSERT(a != b); // Checked by `dominates()` and `strictlyDominates()`.
503
+
504
+ // Nothing strictly dominates the entry block.
505
+ const RABlock* entryBlock = this->entryBlock();
506
+ if (a == entryBlock)
507
+ return false;
508
+
509
+ const RABlock* iDom = b->iDom();
510
+ while (iDom != a && iDom != entryBlock)
511
+ iDom = iDom->iDom();
512
+
513
+ return iDom != entryBlock;
514
+ }
515
+
516
+ const RABlock* BaseRAPass::_nearestCommonDominator(const RABlock* a, const RABlock* b) const noexcept {
517
+ ASMJIT_ASSERT(a != nullptr); // There must be at least one block if this function is
518
+ ASMJIT_ASSERT(b != nullptr); // called, as both `a` and `b` must be valid blocks.
519
+ ASMJIT_ASSERT(a != b); // Checked by `dominates()` and `properlyDominates()`.
520
+
521
+ if (a == b)
522
+ return a;
523
+
524
+ // If `a` strictly dominates `b` then `a` is the nearest common dominator.
525
+ if (_strictlyDominates(a, b))
526
+ return a;
527
+
528
+ // If `b` strictly dominates `a` then `b` is the nearest common dominator.
529
+ if (_strictlyDominates(b, a))
530
+ return b;
531
+
532
+ const RABlock* entryBlock = this->entryBlock();
533
+ uint64_t timestamp = nextTimestamp();
534
+
535
+ // Mark all A's dominators.
536
+ const RABlock* block = a->iDom();
537
+ while (block != entryBlock) {
538
+ block->setTimestamp(timestamp);
539
+ block = block->iDom();
540
+ }
541
+
542
+ // Check all B's dominators against marked dominators of A.
543
+ block = b->iDom();
544
+ while (block != entryBlock) {
545
+ if (block->hasTimestamp(timestamp))
546
+ return block;
547
+ block = block->iDom();
548
+ }
549
+
550
+ return entryBlock;
551
+ }
552
+
553
+ // BaseRAPass - CFG - Utilities
554
+ // ============================
555
+
556
+ Error BaseRAPass::removeUnreachableCode() noexcept {
557
+ uint32_t numAllBlocks = blockCount();
558
+ uint32_t numReachableBlocks = reachableBlockCount();
559
+
560
+ // All reachable -> nothing to do.
561
+ if (numAllBlocks == numReachableBlocks)
562
+ return kErrorOk;
563
+
564
+ #ifndef ASMJIT_NO_LOGGING
565
+ StringTmp<256> sb;
566
+ Logger* logger = getLoggerIf(DiagnosticOptions::kRADebugUnreachable);
567
+ ASMJIT_RA_LOG_FORMAT("[RemoveUnreachableCode - detected %u of %u unreachable blocks]\n", numAllBlocks - numReachableBlocks, numAllBlocks);
568
+ #endif
569
+
570
+ for (uint32_t i = 0; i < numAllBlocks; i++) {
571
+ RABlock* block = _blocks[i];
572
+ if (block->isReachable())
573
+ continue;
574
+
575
+ ASMJIT_RA_LOG_FORMAT(" Removing code from unreachable block {%u}\n", i);
576
+ BaseNode* first = block->first();
577
+ BaseNode* last = block->last();
578
+
579
+ BaseNode* beforeFirst = first->prev();
580
+ BaseNode* afterLast = last->next();
581
+
582
+ BaseNode* node = first;
583
+ while (node != afterLast) {
584
+ BaseNode* next = node->next();
585
+
586
+ if (node->isCode() || node->isRemovable()) {
587
+ #ifndef ASMJIT_NO_LOGGING
588
+ if (logger) {
589
+ sb.clear();
590
+ Formatter::formatNode(sb, _formatOptions, cc(), node);
591
+ logger->logf(" %s\n", sb.data());
592
+ }
593
+ #endif
594
+ cc()->removeNode(node);
595
+ }
596
+ node = next;
597
+ }
598
+
599
+ if (beforeFirst->next() == afterLast) {
600
+ block->setFirst(nullptr);
601
+ block->setLast(nullptr);
602
+ }
603
+ else {
604
+ block->setFirst(beforeFirst->next());
605
+ block->setLast(afterLast->prev());
606
+ }
607
+ }
608
+
609
+ return kErrorOk;
610
+ }
611
+
612
+ BaseNode* BaseRAPass::findSuccessorStartingAt(BaseNode* node) noexcept {
613
+ while (node && (node->isInformative() || node->hasNoEffect()))
614
+ node = node->next();
615
+ return node;
616
+ }
617
+
618
+ bool BaseRAPass::isNextTo(BaseNode* node, BaseNode* target) noexcept {
619
+ for (;;) {
620
+ node = node->next();
621
+ if (node == target)
622
+ return true;
623
+
624
+ if (!node)
625
+ return false;
626
+
627
+ if (node->isCode() || node->isData())
628
+ return false;
629
+ }
630
+ }
631
+
632
+ // BaseRAPass - Registers - VirtReg / WorkReg Mapping
633
+ // ==================================================
634
+
635
+ Error BaseRAPass::_asWorkReg(VirtReg* vReg, RAWorkReg** out) noexcept {
636
+ // Checked by `asWorkReg()` - must be true.
637
+ ASMJIT_ASSERT(vReg->_workReg == nullptr);
638
+
639
+ RegGroup group = vReg->group();
640
+ ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
641
+
642
+ RAWorkRegs& wRegs = workRegs();
643
+ RAWorkRegs& wRegsByGroup = workRegs(group);
644
+
645
+ ASMJIT_PROPAGATE(wRegs.willGrow(allocator()));
646
+ ASMJIT_PROPAGATE(wRegsByGroup.willGrow(allocator()));
647
+
648
+ RAWorkReg* wReg = zone()->newT<RAWorkReg>(vReg, wRegs.size());
649
+ if (ASMJIT_UNLIKELY(!wReg))
650
+ return DebugUtils::errored(kErrorOutOfMemory);
651
+
652
+ vReg->setWorkReg(wReg);
653
+ if (!vReg->isStack())
654
+ wReg->setRegByteMask(Support::lsbMask<uint64_t>(vReg->virtSize()));
655
+ wRegs.appendUnsafe(wReg);
656
+ wRegsByGroup.appendUnsafe(wReg);
657
+
658
+ // Only used by RA logging.
659
+ _maxWorkRegNameSize = Support::max(_maxWorkRegNameSize, vReg->nameSize());
660
+
661
+ *out = wReg;
662
+ return kErrorOk;
663
+ }
664
+
665
+ RAAssignment::WorkToPhysMap* BaseRAPass::newWorkToPhysMap() noexcept {
666
+ uint32_t count = workRegCount();
667
+ size_t size = WorkToPhysMap::sizeOf(count);
668
+
669
+ // If no registers are used it could be zero, in that case return a dummy
670
+ // map instead of NULL.
671
+ if (ASMJIT_UNLIKELY(!size)) {
672
+ static const RAAssignment::WorkToPhysMap nullMap = {{ 0 }};
673
+ return const_cast<RAAssignment::WorkToPhysMap*>(&nullMap);
674
+ }
675
+
676
+ WorkToPhysMap* map = zone()->allocT<WorkToPhysMap>(size);
677
+ if (ASMJIT_UNLIKELY(!map))
678
+ return nullptr;
679
+
680
+ map->reset(count);
681
+ return map;
682
+ }
683
+
684
+ RAAssignment::PhysToWorkMap* BaseRAPass::newPhysToWorkMap() noexcept {
685
+ uint32_t count = physRegTotal();
686
+ size_t size = PhysToWorkMap::sizeOf(count);
687
+
688
+ PhysToWorkMap* map = zone()->allocT<PhysToWorkMap>(size);
689
+ if (ASMJIT_UNLIKELY(!map))
690
+ return nullptr;
691
+
692
+ map->reset(count);
693
+ return map;
694
+ }
695
+
696
+ // BaseRAPass - Registers - Liveness Analysis and Statistics
697
+ // =========================================================
698
+
699
+ namespace LiveOps {
700
+ typedef ZoneBitVector::BitWord BitWord;
701
+
702
+ struct In {
703
+ static ASMJIT_FORCE_INLINE BitWord op(BitWord dst, BitWord out, BitWord gen, BitWord kill) noexcept {
704
+ DebugUtils::unused(dst);
705
+ return (out | gen) & ~kill;
706
+ }
707
+ };
708
+
709
+ template<typename Operator>
710
+ static ASMJIT_FORCE_INLINE bool op(BitWord* dst, const BitWord* a, uint32_t n) noexcept {
711
+ BitWord changed = 0;
712
+
713
+ for (uint32_t i = 0; i < n; i++) {
714
+ BitWord before = dst[i];
715
+ BitWord after = Operator::op(before, a[i]);
716
+
717
+ dst[i] = after;
718
+ changed |= (before ^ after);
719
+ }
720
+
721
+ return changed != 0;
722
+ }
723
+
724
+ template<typename Operator>
725
+ static ASMJIT_FORCE_INLINE bool op(BitWord* dst, const BitWord* a, const BitWord* b, uint32_t n) noexcept {
726
+ BitWord changed = 0;
727
+
728
+ for (uint32_t i = 0; i < n; i++) {
729
+ BitWord before = dst[i];
730
+ BitWord after = Operator::op(before, a[i], b[i]);
731
+
732
+ dst[i] = after;
733
+ changed |= (before ^ after);
734
+ }
735
+
736
+ return changed != 0;
737
+ }
738
+
739
+ template<typename Operator>
740
+ static ASMJIT_FORCE_INLINE bool op(BitWord* dst, const BitWord* a, const BitWord* b, const BitWord* c, uint32_t n) noexcept {
741
+ BitWord changed = 0;
742
+
743
+ for (uint32_t i = 0; i < n; i++) {
744
+ BitWord before = dst[i];
745
+ BitWord after = Operator::op(before, a[i], b[i], c[i]);
746
+
747
+ dst[i] = after;
748
+ changed |= (before ^ after);
749
+ }
750
+
751
+ return changed != 0;
752
+ }
753
+
754
+ static ASMJIT_FORCE_INLINE bool recalcInOut(RABlock* block, uint32_t numBitWords, bool initial = false) noexcept {
755
+ bool changed = initial;
756
+
757
+ const RABlocks& successors = block->successors();
758
+ uint32_t numSuccessors = successors.size();
759
+
760
+ // Calculate `OUT` based on `IN` of all successors.
761
+ for (uint32_t i = 0; i < numSuccessors; i++)
762
+ changed |= op<Support::Or>(block->liveOut().data(), successors[i]->liveIn().data(), numBitWords);
763
+
764
+ // Calculate `IN` based on `OUT`, `GEN`, and `KILL` bits.
765
+ if (changed)
766
+ changed = op<In>(block->liveIn().data(), block->liveOut().data(), block->gen().data(), block->kill().data(), numBitWords);
767
+
768
+ return changed;
769
+ }
770
+ }
771
+
772
+ ASMJIT_FAVOR_SPEED Error BaseRAPass::buildLiveness() noexcept {
773
+ #ifndef ASMJIT_NO_LOGGING
774
+ Logger* logger = getLoggerIf(DiagnosticOptions::kRADebugLiveness);
775
+ StringTmp<512> sb;
776
+ #endif
777
+
778
+ ASMJIT_RA_LOG_FORMAT("[BuildLiveness]\n");
779
+
780
+ uint32_t i;
781
+
782
+ uint32_t numAllBlocks = blockCount();
783
+ uint32_t numReachableBlocks = reachableBlockCount();
784
+
785
+ uint32_t numVisits = numReachableBlocks;
786
+ uint32_t numWorkRegs = workRegCount();
787
+ uint32_t numBitWords = ZoneBitVector::_wordsPerBits(numWorkRegs);
788
+
789
+ if (!numWorkRegs) {
790
+ ASMJIT_RA_LOG_FORMAT(" Done (no virtual registers)\n");
791
+ return kErrorOk;
792
+ }
793
+
794
+ ZoneVector<uint32_t> nUsesPerWorkReg; // Number of USEs of each RAWorkReg.
795
+ ZoneVector<uint32_t> nOutsPerWorkReg; // Number of OUTs of each RAWorkReg.
796
+ ZoneVector<uint32_t> nInstsPerBlock; // Number of instructions of each RABlock.
797
+
798
+ ASMJIT_PROPAGATE(nUsesPerWorkReg.resize(allocator(), numWorkRegs));
799
+ ASMJIT_PROPAGATE(nOutsPerWorkReg.resize(allocator(), numWorkRegs));
800
+ ASMJIT_PROPAGATE(nInstsPerBlock.resize(allocator(), numAllBlocks));
801
+
802
+ // Calculate GEN/KILL of Each Block
803
+ // --------------------------------
804
+
805
+ for (i = 0; i < numReachableBlocks; i++) {
806
+ RABlock* block = _pov[i];
807
+ ASMJIT_PROPAGATE(block->resizeLiveBits(numWorkRegs));
808
+
809
+ BaseNode* node = block->last();
810
+ BaseNode* stop = block->first();
811
+
812
+ uint32_t nInsts = 0;
813
+ for (;;) {
814
+ if (node->isInst()) {
815
+ InstNode* inst = node->as<InstNode>();
816
+ RAInst* raInst = inst->passData<RAInst>();
817
+ ASMJIT_ASSERT(raInst != nullptr);
818
+
819
+ RATiedReg* tiedRegs = raInst->tiedRegs();
820
+ uint32_t count = raInst->tiedCount();
821
+
822
+ for (uint32_t j = 0; j < count; j++) {
823
+ RATiedReg* tiedReg = &tiedRegs[j];
824
+ uint32_t workId = tiedReg->workId();
825
+
826
+ // Update `nUses` and `nOuts`.
827
+ nUsesPerWorkReg[workId] += 1u;
828
+ nOutsPerWorkReg[workId] += uint32_t(tiedReg->isWrite());
829
+
830
+ // Mark as:
831
+ // KILL - if this VirtReg is killed afterwards.
832
+ // LAST - if this VirtReg is last in this basic block.
833
+ if (block->kill().bitAt(workId))
834
+ tiedReg->addFlags(RATiedFlags::kKill);
835
+ else if (!block->gen().bitAt(workId))
836
+ tiedReg->addFlags(RATiedFlags::kLast);
837
+
838
+ if (tiedReg->isWriteOnly()) {
839
+ // KILL.
840
+ block->kill().setBit(workId, true);
841
+ }
842
+ else {
843
+ // GEN.
844
+ block->kill().setBit(workId, false);
845
+ block->gen().setBit(workId, true);
846
+ }
847
+
848
+ if (tiedReg->isLeadConsecutive()) {
849
+ RAWorkReg* workReg = workRegById(workId);
850
+ workReg->markLeadConsecutive();
851
+ }
852
+
853
+ if (tiedReg->hasConsecutiveParent()) {
854
+ RAWorkReg* consecutiveParentReg = workRegById(tiedReg->consecutiveParent());
855
+ consecutiveParentReg->addImmediateConsecutive(allocator(), workId);
856
+ }
857
+ }
858
+
859
+ nInsts++;
860
+ }
861
+
862
+ if (node == stop)
863
+ break;
864
+
865
+ node = node->prev();
866
+ ASMJIT_ASSERT(node != nullptr);
867
+ }
868
+
869
+ nInstsPerBlock[block->blockId()] = nInsts;
870
+ }
871
+
872
+ // Calculate IN/OUT of Each Block
873
+ // ------------------------------
874
+
875
+ {
876
+ ZoneStack<RABlock*> workList;
877
+ ZoneBitVector workBits;
878
+
879
+ ASMJIT_PROPAGATE(workList.init(allocator()));
880
+ ASMJIT_PROPAGATE(workBits.resize(allocator(), blockCount(), true));
881
+
882
+ for (i = 0; i < numReachableBlocks; i++) {
883
+ RABlock* block = _pov[i];
884
+ LiveOps::recalcInOut(block, numBitWords, true);
885
+ ASMJIT_PROPAGATE(workList.append(block));
886
+ }
887
+
888
+ while (!workList.empty()) {
889
+ RABlock* block = workList.popFirst();
890
+ uint32_t blockId = block->blockId();
891
+
892
+ workBits.setBit(blockId, false);
893
+ if (LiveOps::recalcInOut(block, numBitWords)) {
894
+ const RABlocks& predecessors = block->predecessors();
895
+ uint32_t numPredecessors = predecessors.size();
896
+
897
+ for (uint32_t j = 0; j < numPredecessors; j++) {
898
+ RABlock* pred = predecessors[j];
899
+ if (!workBits.bitAt(pred->blockId())) {
900
+ workBits.setBit(pred->blockId(), true);
901
+ ASMJIT_PROPAGATE(workList.append(pred));
902
+ }
903
+ }
904
+ }
905
+ numVisits++;
906
+ }
907
+
908
+ workList.reset();
909
+ workBits.release(allocator());
910
+ }
911
+
912
+ ASMJIT_RA_LOG_COMPLEX({
913
+ logger->logf(" LiveIn/Out Done (%u visits)\n", numVisits);
914
+ for (i = 0; i < numAllBlocks; i++) {
915
+ RABlock* block = _blocks[i];
916
+
917
+ ASMJIT_PROPAGATE(sb.assignFormat(" {#%u}\n", block->blockId()));
918
+ ASMJIT_PROPAGATE(_dumpBlockLiveness(sb, block));
919
+
920
+ logger->log(sb);
921
+ }
922
+ });
923
+
924
+ // Reserve the space in each `RAWorkReg` for references
925
+ // ----------------------------------------------------
926
+
927
+ for (i = 0; i < numWorkRegs; i++) {
928
+ RAWorkReg* workReg = workRegById(i);
929
+ ASMJIT_PROPAGATE(workReg->_refs.reserve(allocator(), nUsesPerWorkReg[i]));
930
+ ASMJIT_PROPAGATE(workReg->_writes.reserve(allocator(), nOutsPerWorkReg[i]));
931
+ }
932
+
933
+ // Assign block and instruction positions, build LiveCount and LiveSpans
934
+ // ---------------------------------------------------------------------
935
+
936
+ uint32_t position = 2;
937
+ for (i = 0; i < numAllBlocks; i++) {
938
+ RABlock* block = _blocks[i];
939
+ if (!block->isReachable())
940
+ continue;
941
+
942
+ BaseNode* node = block->first();
943
+ BaseNode* stop = block->last();
944
+
945
+ uint32_t endPosition = position + nInstsPerBlock[i] * 2;
946
+ block->setFirstPosition(position);
947
+ block->setEndPosition(endPosition);
948
+
949
+ RALiveCount curLiveCount;
950
+ RALiveCount maxLiveCount;
951
+
952
+ // Process LIVE-IN.
953
+ ZoneBitVector::ForEachBitSet it(block->liveIn());
954
+ while (it.hasNext()) {
955
+ RAWorkReg* workReg = _workRegs[uint32_t(it.next())];
956
+ curLiveCount[workReg->group()]++;
957
+ ASMJIT_PROPAGATE(workReg->liveSpans().openAt(allocator(), position, endPosition));
958
+ }
959
+
960
+ for (;;) {
961
+ if (node->isInst()) {
962
+ InstNode* inst = node->as<InstNode>();
963
+ RAInst* raInst = inst->passData<RAInst>();
964
+ ASMJIT_ASSERT(raInst != nullptr);
965
+
966
+ RATiedReg* tiedRegs = raInst->tiedRegs();
967
+ uint32_t count = raInst->tiedCount();
968
+
969
+ inst->setPosition(position);
970
+ raInst->_liveCount = curLiveCount;
971
+
972
+ for (uint32_t j = 0; j < count; j++) {
973
+ RATiedReg* tiedReg = &tiedRegs[j];
974
+ uint32_t workId = tiedReg->workId();
975
+
976
+ // Create refs and writes.
977
+ RAWorkReg* workReg = workRegById(workId);
978
+ workReg->_refs.appendUnsafe(node);
979
+ if (tiedReg->isWrite())
980
+ workReg->_writes.appendUnsafe(node);
981
+
982
+ // We couldn't calculate this in previous steps, but since we know all LIVE-OUT at this point it becomes
983
+ // trivial. If this is the last instruction that uses this `workReg` and it's not LIVE-OUT then it is
984
+ // KILLed here.
985
+ if (tiedReg->isLast() && !block->liveOut().bitAt(workId))
986
+ tiedReg->addFlags(RATiedFlags::kKill);
987
+
988
+ LiveRegSpans& liveSpans = workReg->liveSpans();
989
+ bool wasOpen;
990
+ ASMJIT_PROPAGATE(liveSpans.openAt(allocator(), position + !tiedReg->isRead(), endPosition, wasOpen));
991
+
992
+ RegGroup group = workReg->group();
993
+ if (!wasOpen) {
994
+ curLiveCount[group]++;
995
+ raInst->_liveCount[group]++;
996
+ }
997
+
998
+ if (tiedReg->isKill()) {
999
+ liveSpans.closeAt(position + !tiedReg->isRead() + 1);
1000
+ curLiveCount[group]--;
1001
+ }
1002
+
1003
+ // Update `RAWorkReg::useIdMask` and `RAWorkReg::hintRegId`.
1004
+ if (tiedReg->hasUseId()) {
1005
+ uint32_t useId = tiedReg->useId();
1006
+ workReg->addUseIdMask(Support::bitMask(useId));
1007
+ if (!workReg->hasHintRegId() && !Support::bitTest(raInst->_clobberedRegs[group], useId))
1008
+ workReg->setHintRegId(useId);
1009
+ }
1010
+
1011
+ if (tiedReg->useRegMask()) {
1012
+ workReg->restrictPreferredMask(tiedReg->useRegMask());
1013
+ if (workReg->isLeadConsecutive())
1014
+ workReg->restrictConsecutiveMask(tiedReg->useRegMask());
1015
+ }
1016
+
1017
+ if (tiedReg->outRegMask()) {
1018
+ workReg->restrictPreferredMask(tiedReg->outRegMask());
1019
+ if (workReg->isLeadConsecutive())
1020
+ workReg->restrictConsecutiveMask(tiedReg->outRegMask());
1021
+ }
1022
+
1023
+ // Update `RAWorkReg::clobberedSurvivalMask`.
1024
+ if (raInst->_clobberedRegs[group] && !tiedReg->isOutOrKill()) {
1025
+ workReg->addClobberSurvivalMask(raInst->_clobberedRegs[group]);
1026
+ }
1027
+ }
1028
+
1029
+ position += 2;
1030
+ maxLiveCount.op<Support::Max>(raInst->_liveCount);
1031
+ }
1032
+
1033
+ if (node == stop)
1034
+ break;
1035
+
1036
+ node = node->next();
1037
+ ASMJIT_ASSERT(node != nullptr);
1038
+ }
1039
+
1040
+ block->_maxLiveCount = maxLiveCount;
1041
+ _globalMaxLiveCount.op<Support::Max>(maxLiveCount);
1042
+ ASMJIT_ASSERT(position == block->endPosition());
1043
+ }
1044
+
1045
+ // Calculate WorkReg statistics
1046
+ // ----------------------------
1047
+
1048
+ for (i = 0; i < numWorkRegs; i++) {
1049
+ RAWorkReg* workReg = _workRegs[i];
1050
+
1051
+ LiveRegSpans& spans = workReg->liveSpans();
1052
+ uint32_t width = spans.width();
1053
+ float freq = width ? float(double(workReg->_refs.size()) / double(width)) : float(0);
1054
+
1055
+ RALiveStats& stats = workReg->liveStats();
1056
+ stats._width = width;
1057
+ stats._freq = freq;
1058
+ stats._priority = freq + float(int(workReg->virtReg()->weight())) * 0.01f;
1059
+ }
1060
+
1061
+ ASMJIT_RA_LOG_COMPLEX({
1062
+ sb.clear();
1063
+ _dumpLiveSpans(sb);
1064
+ logger->log(sb);
1065
+ });
1066
+
1067
+ nUsesPerWorkReg.release(allocator());
1068
+ nOutsPerWorkReg.release(allocator());
1069
+ nInstsPerBlock.release(allocator());
1070
+
1071
+ return kErrorOk;
1072
+ }
1073
+
1074
+ Error BaseRAPass::assignArgIndexToWorkRegs() noexcept {
1075
+ ZoneBitVector& liveIn = entryBlock()->liveIn();
1076
+ uint32_t argCount = func()->argCount();
1077
+
1078
+ for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
1079
+ for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
1080
+ // Unassigned argument.
1081
+ const RegOnly& regArg = func()->argPack(argIndex)[valueIndex];
1082
+ if (!regArg.isReg() || !cc()->isVirtIdValid(regArg.id()))
1083
+ continue;
1084
+
1085
+ VirtReg* virtReg = cc()->virtRegById(regArg.id());
1086
+ if (!virtReg)
1087
+ continue;
1088
+
1089
+ // Unreferenced argument.
1090
+ RAWorkReg* workReg = virtReg->workReg();
1091
+ if (!workReg)
1092
+ continue;
1093
+
1094
+ // Overwritten argument.
1095
+ uint32_t workId = workReg->workId();
1096
+ if (!liveIn.bitAt(workId))
1097
+ continue;
1098
+
1099
+ workReg->setArgIndex(argIndex, valueIndex);
1100
+ const FuncValue& arg = func()->detail().arg(argIndex, valueIndex);
1101
+
1102
+ if (arg.isReg() && _archTraits->regTypeToGroup(arg.regType()) == workReg->group()) {
1103
+ workReg->setHintRegId(arg.regId());
1104
+ }
1105
+ }
1106
+ }
1107
+
1108
+ return kErrorOk;
1109
+ }
1110
+
1111
+ // BaseRAPass - Allocation - Global
1112
+ // ================================
1113
+
1114
+ #ifndef ASMJIT_NO_LOGGING
1115
+ static void RAPass_dumpSpans(String& sb, uint32_t index, const LiveRegSpans& liveSpans) noexcept {
1116
+ sb.appendFormat(" %02u: ", index);
1117
+
1118
+ for (uint32_t i = 0; i < liveSpans.size(); i++) {
1119
+ const LiveRegSpan& liveSpan = liveSpans[i];
1120
+ if (i) sb.append(", ");
1121
+ sb.appendFormat("[%u:%u@%u]", liveSpan.a, liveSpan.b, liveSpan.id);
1122
+ }
1123
+
1124
+ sb.append('\n');
1125
+ }
1126
+ #endif
1127
+
1128
+ Error BaseRAPass::runGlobalAllocator() noexcept {
1129
+ ASMJIT_PROPAGATE(initGlobalLiveSpans());
1130
+
1131
+ for (RegGroup group : RegGroupVirtValues{}) {
1132
+ ASMJIT_PROPAGATE(binPack(group));
1133
+ }
1134
+
1135
+ return kErrorOk;
1136
+ }
1137
+
1138
+ ASMJIT_FAVOR_SPEED Error BaseRAPass::initGlobalLiveSpans() noexcept {
1139
+ for (RegGroup group : RegGroupVirtValues{}) {
1140
+ size_t physCount = _physRegCount[group];
1141
+ LiveRegSpans* liveSpans = nullptr;
1142
+
1143
+ if (physCount) {
1144
+ liveSpans = allocator()->allocT<LiveRegSpans>(physCount * sizeof(LiveRegSpans));
1145
+ if (ASMJIT_UNLIKELY(!liveSpans))
1146
+ return DebugUtils::errored(kErrorOutOfMemory);
1147
+
1148
+ for (size_t physId = 0; physId < physCount; physId++)
1149
+ new(&liveSpans[physId]) LiveRegSpans();
1150
+ }
1151
+
1152
+ _globalLiveSpans[group] = liveSpans;
1153
+ }
1154
+
1155
+ return kErrorOk;
1156
+ }
1157
+
1158
+ struct RAConsecutiveReg {
1159
+ RAWorkReg* workReg;
1160
+ RAWorkReg* parentReg;
1161
+ };
1162
+
1163
+ ASMJIT_FAVOR_SPEED Error BaseRAPass::binPack(RegGroup group) noexcept {
1164
+ if (workRegCount(group) == 0)
1165
+ return kErrorOk;
1166
+
1167
+ #ifndef ASMJIT_NO_LOGGING
1168
+ Logger* logger = getLoggerIf(DiagnosticOptions::kRADebugAssignment);
1169
+ StringTmp<512> sb;
1170
+
1171
+ ASMJIT_RA_LOG_FORMAT("[BinPack] Available=%u (0x%08X) Count=%u RegGroup=%u\n",
1172
+ Support::popcnt(_availableRegs[group]),
1173
+ _availableRegs[group],
1174
+ workRegCount(group),
1175
+ uint32_t(group));
1176
+ #endif
1177
+
1178
+ uint32_t i;
1179
+ uint32_t physCount = _physRegCount[group];
1180
+
1181
+ RAWorkRegs workRegs;
1182
+ ZoneVector<RAConsecutiveReg> consecutiveRegs;
1183
+ LiveRegSpans tmpSpans;
1184
+
1185
+ ASMJIT_PROPAGATE(workRegs.concat(allocator(), this->workRegs(group)));
1186
+ workRegs.sort([](const RAWorkReg* a, const RAWorkReg* b) noexcept {
1187
+ return b->liveStats().priority() - a->liveStats().priority();
1188
+ });
1189
+
1190
+ uint32_t numWorkRegs = workRegs.size();
1191
+ RegMask availableRegs = _availableRegs[group];
1192
+
1193
+ // First try to pack everything that provides register-id hint as these are most likely function arguments and fixed
1194
+ // (precolored) virtual registers.
1195
+ if (!workRegs.empty()) {
1196
+ uint32_t dstIndex = 0;
1197
+
1198
+ for (i = 0; i < numWorkRegs; i++) {
1199
+ RAWorkReg* workReg = workRegs[i];
1200
+
1201
+ if (workReg->isLeadConsecutive()) {
1202
+ ASMJIT_PROPAGATE(consecutiveRegs.append(allocator(), RAConsecutiveReg{workReg, nullptr}));
1203
+ workReg->markProcessedConsecutive();
1204
+ }
1205
+
1206
+ if (workReg->hasHintRegId()) {
1207
+ uint32_t physId = workReg->hintRegId();
1208
+ if (Support::bitTest(availableRegs, physId)) {
1209
+ LiveRegSpans& live = _globalLiveSpans[group][physId];
1210
+ Error err = tmpSpans.nonOverlappingUnionOf(allocator(), live, workReg->liveSpans(), LiveRegData(workReg->virtId()));
1211
+
1212
+ if (err == kErrorOk) {
1213
+ live.swap(tmpSpans);
1214
+ workReg->setHomeRegId(physId);
1215
+ workReg->markAllocated();
1216
+ continue;
1217
+ }
1218
+
1219
+ if (err != 0xFFFFFFFFu)
1220
+ return err;
1221
+ }
1222
+ }
1223
+
1224
+ workRegs[dstIndex++] = workReg;
1225
+ }
1226
+
1227
+ workRegs._setSize(dstIndex);
1228
+ numWorkRegs = dstIndex;
1229
+ }
1230
+
1231
+ // Allocate consecutive registers - both leads and all consecutives. This is important and prioritized over the rest,
1232
+ // because once a lead is allocated we really need to allocate its consecutives, otherwise we may bin pack other
1233
+ // registers into their places, which would result in wrong hints to the local allocator, and then into many moves
1234
+ // or spills.
1235
+ if (!consecutiveRegs.empty()) {
1236
+ // This loop appends all other consecutive registers into `consecutiveRegs` array. Leads are at the beginning,
1237
+ // non-leads follow.
1238
+ i = 0;
1239
+ for (;;) {
1240
+ uint32_t stop = consecutiveRegs.size();
1241
+ if (i == stop)
1242
+ break;
1243
+
1244
+ while (i < stop) {
1245
+ RAWorkReg* workReg = consecutiveRegs[i].workReg;
1246
+ if (workReg->hasImmediateConsecutives()) {
1247
+ ZoneBitVector::ForEachBitSet it(workReg->immediateConsecutives());
1248
+ while (it.hasNext()) {
1249
+ uint32_t consecutiveWorkId = uint32_t(it.next());
1250
+ RAWorkReg* consecutiveReg = workRegById(consecutiveWorkId);
1251
+ if (!consecutiveReg->isProcessedConsecutive()) {
1252
+ ASMJIT_PROPAGATE(consecutiveRegs.append(allocator(), RAConsecutiveReg{consecutiveReg, workReg}));
1253
+ consecutiveReg->markProcessedConsecutive();
1254
+ }
1255
+ }
1256
+ }
1257
+ i++;
1258
+ }
1259
+ }
1260
+
1261
+ uint32_t numConsecutiveRegs = consecutiveRegs.size();
1262
+ for (i = 0; i < numConsecutiveRegs; i++) {
1263
+ RAWorkReg* workReg = consecutiveRegs[i].workReg;
1264
+ if (workReg->isAllocated())
1265
+ continue;
1266
+
1267
+ RAWorkReg* parentReg = consecutiveRegs[i].parentReg;
1268
+ RegMask physRegs = 0;
1269
+
1270
+ if (!parentReg) {
1271
+ physRegs = availableRegs & workReg->preferredMask();
1272
+ if (!physRegs) {
1273
+ physRegs = availableRegs & workReg->consecutiveMask();
1274
+
1275
+ // NOTE: This should never be true as it would mean we would never allocate this virtual register
1276
+ // (not here, and not later when local register allocator processes RATiedReg sets).
1277
+ if (ASMJIT_UNLIKELY(!physRegs))
1278
+ return DebugUtils::errored(kErrorConsecutiveRegsAllocation);
1279
+ }
1280
+ }
1281
+ else if (parentReg->hasHomeRegId()) {
1282
+ uint32_t consecutiveId = parentReg->homeRegId() + 1;
1283
+
1284
+ // NOTE: We don't support wrapping. If this goes beyond all allocable registers there is something wrong.
1285
+ if (consecutiveId > 31 || !Support::bitTest(availableRegs, consecutiveId))
1286
+ return DebugUtils::errored(kErrorConsecutiveRegsAllocation);
1287
+
1288
+ workReg->setHintRegId(consecutiveId);
1289
+ physRegs = Support::bitMask(consecutiveId);
1290
+ }
1291
+
1292
+ while (physRegs) {
1293
+ uint32_t physId = Support::bitSizeOf<RegMask>() - 1 - Support::clz(physRegs);
1294
+
1295
+ LiveRegSpans& live = _globalLiveSpans[group][physId];
1296
+ Error err = tmpSpans.nonOverlappingUnionOf(allocator(), live, workReg->liveSpans(), LiveRegData(workReg->virtId()));
1297
+
1298
+ if (err == kErrorOk) {
1299
+ workReg->setHomeRegId(physId);
1300
+ workReg->markAllocated();
1301
+ live.swap(tmpSpans);
1302
+ break;
1303
+ }
1304
+
1305
+ if (ASMJIT_UNLIKELY(err != 0xFFFFFFFFu))
1306
+ return err;
1307
+
1308
+ physRegs ^= Support::bitMask(physId);
1309
+ }
1310
+ }
1311
+ }
1312
+
1313
+ // Try to pack the rest.
1314
+ if (!workRegs.empty()) {
1315
+ uint32_t dstIndex = 0;
1316
+
1317
+ for (i = 0; i < numWorkRegs; i++) {
1318
+ RAWorkReg* workReg = workRegs[i];
1319
+
1320
+ if (workReg->isAllocated())
1321
+ continue;
1322
+
1323
+ RegMask physRegs = availableRegs;
1324
+ if (physRegs & workReg->preferredMask())
1325
+ physRegs &= workReg->preferredMask();
1326
+
1327
+ while (physRegs) {
1328
+ RegMask preferredMask = physRegs;
1329
+ uint32_t physId = Support::ctz(preferredMask);
1330
+
1331
+ if (workReg->clobberSurvivalMask()) {
1332
+ preferredMask &= workReg->clobberSurvivalMask();
1333
+ if (preferredMask)
1334
+ physId = Support::ctz(preferredMask);
1335
+ }
1336
+
1337
+ LiveRegSpans& live = _globalLiveSpans[group][physId];
1338
+ Error err = tmpSpans.nonOverlappingUnionOf(allocator(), live, workReg->liveSpans(), LiveRegData(workReg->virtId()));
1339
+
1340
+ if (err == kErrorOk) {
1341
+ workReg->setHomeRegId(physId);
1342
+ workReg->markAllocated();
1343
+ live.swap(tmpSpans);
1344
+ break;
1345
+ }
1346
+
1347
+ if (ASMJIT_UNLIKELY(err != 0xFFFFFFFFu))
1348
+ return err;
1349
+
1350
+ physRegs ^= Support::bitMask(physId);
1351
+ }
1352
+
1353
+ // Keep it in `workRegs` if it was not allocated.
1354
+ if (!physRegs)
1355
+ workRegs[dstIndex++] = workReg;
1356
+ }
1357
+
1358
+ workRegs._setSize(dstIndex);
1359
+ numWorkRegs = dstIndex;
1360
+ }
1361
+
1362
+ ASMJIT_RA_LOG_COMPLEX({
1363
+ for (uint32_t physId = 0; physId < physCount; physId++) {
1364
+ LiveRegSpans& live = _globalLiveSpans[group][physId];
1365
+ if (live.empty())
1366
+ continue;
1367
+
1368
+ sb.clear();
1369
+ RAPass_dumpSpans(sb, physId, live);
1370
+ logger->log(sb);
1371
+ }
1372
+ });
1373
+
1374
+ // Maybe unused if logging is disabled.
1375
+ DebugUtils::unused(physCount);
1376
+
1377
+ if (workRegs.empty()) {
1378
+ ASMJIT_RA_LOG_FORMAT(" Completed.\n");
1379
+ }
1380
+ else {
1381
+ _strategy[group].setType(RAStrategyType::kComplex);
1382
+ for (RAWorkReg* workReg : workRegs)
1383
+ workReg->markStackPreferred();
1384
+
1385
+ ASMJIT_RA_LOG_COMPLEX({
1386
+ uint32_t count = workRegs.size();
1387
+ sb.clear();
1388
+ sb.appendFormat(" Unassigned (%u): ", count);
1389
+ for (i = 0; i < numWorkRegs; i++) {
1390
+ RAWorkReg* workReg = workRegs[i];
1391
+ if (i) sb.append(", ");
1392
+ sb.append(workReg->name());
1393
+ }
1394
+ sb.append('\n');
1395
+ logger->log(sb);
1396
+ });
1397
+ }
1398
+
1399
+ return kErrorOk;
1400
+ }
1401
+
1402
+ // BaseRAPass - Allocation - Local
1403
+ // ===============================
1404
+
1405
+ Error BaseRAPass::runLocalAllocator() noexcept {
1406
+ RALocalAllocator lra(this);
1407
+ ASMJIT_PROPAGATE(lra.init());
1408
+
1409
+ if (!blockCount())
1410
+ return kErrorOk;
1411
+
1412
+ // The allocation is done when this reaches zero.
1413
+ uint32_t blocksRemaining = reachableBlockCount();
1414
+
1415
+ // Current block.
1416
+ uint32_t blockId = 0;
1417
+ RABlock* block = _blocks[blockId];
1418
+
1419
+ // The first block (entry) must always be reachable.
1420
+ ASMJIT_ASSERT(block->isReachable());
1421
+
1422
+ // Assign function arguments for the initial block. The `lra` is valid now.
1423
+ lra.makeInitialAssignment();
1424
+ ASMJIT_PROPAGATE(setBlockEntryAssignment(block, block, lra._curAssignment));
1425
+
1426
+ // The loop starts from the first block and iterates blocks in order, however, the algorithm also allows to jump to
1427
+ // any other block when finished if it's a jump target. In-order iteration just makes sure that all blocks are visited.
1428
+ for (;;) {
1429
+ BaseNode* first = block->first();
1430
+ BaseNode* last = block->last();
1431
+ BaseNode* terminator = block->hasTerminator() ? last : nullptr;
1432
+
1433
+ BaseNode* beforeFirst = first->prev();
1434
+ BaseNode* afterLast = last->next();
1435
+
1436
+ bool unconditionalJump = false;
1437
+ RABlock* consecutive = nullptr;
1438
+
1439
+ if (block->hasSuccessors())
1440
+ consecutive = block->successors()[0];
1441
+
1442
+ lra.setBlock(block);
1443
+ block->makeAllocated();
1444
+
1445
+ BaseNode* node = first;
1446
+ while (node != afterLast) {
1447
+ BaseNode* next = node->next();
1448
+ if (node->isInst()) {
1449
+ InstNode* inst = node->as<InstNode>();
1450
+
1451
+ if (ASMJIT_UNLIKELY(inst == terminator)) {
1452
+ const RABlocks& successors = block->successors();
1453
+ if (block->hasConsecutive()) {
1454
+ ASMJIT_PROPAGATE(lra.allocBranch(inst, successors.last(), successors.first()));
1455
+
1456
+ node = next;
1457
+ continue;
1458
+ }
1459
+ else if (successors.size() > 1) {
1460
+ RABlock* cont = block->hasConsecutive() ? successors.first() : nullptr;
1461
+ ASMJIT_PROPAGATE(lra.allocJumpTable(inst, successors, cont));
1462
+
1463
+ node = next;
1464
+ continue;
1465
+ }
1466
+ else {
1467
+ // Otherwise this is an unconditional jump, special handling isn't required.
1468
+ unconditionalJump = true;
1469
+ }
1470
+ }
1471
+
1472
+ ASMJIT_PROPAGATE(lra.allocInst(inst));
1473
+ if (inst->type() == NodeType::kInvoke)
1474
+ ASMJIT_PROPAGATE(emitPreCall(inst->as<InvokeNode>()));
1475
+ else
1476
+ ASMJIT_PROPAGATE(lra.spillAfterAllocation(inst));
1477
+ }
1478
+ node = next;
1479
+ }
1480
+
1481
+ if (consecutive) {
1482
+ BaseNode* prev = afterLast ? afterLast->prev() : cc()->lastNode();
1483
+ cc()->_setCursor(unconditionalJump ? prev->prev() : prev);
1484
+
1485
+ if (consecutive->hasEntryAssignment()) {
1486
+ ASMJIT_PROPAGATE(lra.switchToAssignment(consecutive->entryPhysToWorkMap(), consecutive->liveIn(), consecutive->isAllocated(), false));
1487
+ }
1488
+ else {
1489
+ ASMJIT_PROPAGATE(lra.spillRegsBeforeEntry(consecutive));
1490
+ ASMJIT_PROPAGATE(setBlockEntryAssignment(consecutive, block, lra._curAssignment));
1491
+ lra._curAssignment.copyFrom(consecutive->entryPhysToWorkMap());
1492
+ }
1493
+ }
1494
+
1495
+ // Important as the local allocator can insert instructions before
1496
+ // and after any instruction within the basic block.
1497
+ block->setFirst(beforeFirst->next());
1498
+ block->setLast(afterLast ? afterLast->prev() : cc()->lastNode());
1499
+
1500
+ if (--blocksRemaining == 0)
1501
+ break;
1502
+
1503
+ // Switch to the next consecutive block, if any.
1504
+ if (consecutive) {
1505
+ block = consecutive;
1506
+ if (!block->isAllocated())
1507
+ continue;
1508
+ }
1509
+
1510
+ // Get the next block.
1511
+ for (;;) {
1512
+ if (++blockId >= blockCount())
1513
+ blockId = 0;
1514
+
1515
+ block = _blocks[blockId];
1516
+ if (!block->isReachable() || block->isAllocated() || !block->hasEntryAssignment())
1517
+ continue;
1518
+
1519
+ break;
1520
+ }
1521
+
1522
+ // If we switched to some block we have to update the local allocator.
1523
+ lra.replaceAssignment(block->entryPhysToWorkMap());
1524
+ }
1525
+
1526
+ _clobberedRegs.op<Support::Or>(lra._clobberedRegs);
1527
+ return kErrorOk;
1528
+ }
1529
+
1530
+ Error BaseRAPass::setBlockEntryAssignment(RABlock* block, const RABlock* fromBlock, const RAAssignment& fromAssignment) noexcept {
1531
+ if (block->hasSharedAssignmentId()) {
1532
+ uint32_t sharedAssignmentId = block->sharedAssignmentId();
1533
+
1534
+ // Shouldn't happen. Entry assignment of a block that has a shared-state will assign to all blocks
1535
+ // with the same sharedAssignmentId. It's a bug if the shared state has been already assigned.
1536
+ if (!_sharedAssignments[sharedAssignmentId].empty())
1537
+ return DebugUtils::errored(kErrorInvalidState);
1538
+
1539
+ return setSharedAssignment(sharedAssignmentId, fromAssignment);
1540
+ }
1541
+
1542
+ PhysToWorkMap* physToWorkMap = clonePhysToWorkMap(fromAssignment.physToWorkMap());
1543
+ if (ASMJIT_UNLIKELY(!physToWorkMap))
1544
+ return DebugUtils::errored(kErrorOutOfMemory);
1545
+
1546
+ block->setEntryAssignment(physToWorkMap);
1547
+
1548
+ // True if this is the first (entry) block, nothing to do in this case.
1549
+ if (block == fromBlock) {
1550
+ // Entry block should never have a shared state.
1551
+ if (block->hasSharedAssignmentId())
1552
+ return DebugUtils::errored(kErrorInvalidState);
1553
+
1554
+ return kErrorOk;
1555
+ }
1556
+
1557
+ const ZoneBitVector& liveOut = fromBlock->liveOut();
1558
+ const ZoneBitVector& liveIn = block->liveIn();
1559
+
1560
+ // It's possible that `fromBlock` has LIVE-OUT regs that `block` doesn't
1561
+ // have in LIVE-IN, these have to be unassigned.
1562
+ {
1563
+ ZoneBitVector::ForEachBitOp<Support::AndNot> it(liveOut, liveIn);
1564
+ while (it.hasNext()) {
1565
+ uint32_t workId = uint32_t(it.next());
1566
+ RAWorkReg* workReg = workRegById(workId);
1567
+
1568
+ RegGroup group = workReg->group();
1569
+ uint32_t physId = fromAssignment.workToPhysId(group, workId);
1570
+
1571
+ if (physId != RAAssignment::kPhysNone)
1572
+ physToWorkMap->unassign(group, physId, _physRegIndex.get(group) + physId);
1573
+ }
1574
+ }
1575
+
1576
+ return blockEntryAssigned(physToWorkMap);
1577
+ }
1578
+
1579
+ Error BaseRAPass::setSharedAssignment(uint32_t sharedAssignmentId, const RAAssignment& fromAssignment) noexcept {
1580
+ ASMJIT_ASSERT(_sharedAssignments[sharedAssignmentId].empty());
1581
+
1582
+ PhysToWorkMap* physToWorkMap = clonePhysToWorkMap(fromAssignment.physToWorkMap());
1583
+ if (ASMJIT_UNLIKELY(!physToWorkMap))
1584
+ return DebugUtils::errored(kErrorOutOfMemory);
1585
+
1586
+ _sharedAssignments[sharedAssignmentId].assignPhysToWorkMap(physToWorkMap);
1587
+
1588
+ ZoneBitVector& sharedLiveIn = _sharedAssignments[sharedAssignmentId]._liveIn;
1589
+ ASMJIT_PROPAGATE(sharedLiveIn.resize(allocator(), workRegCount()));
1590
+
1591
+ Support::Array<uint32_t, Globals::kNumVirtGroups> sharedAssigned {};
1592
+ for (RABlock* block : blocks()) {
1593
+ if (block->sharedAssignmentId() == sharedAssignmentId) {
1594
+ ASMJIT_ASSERT(!block->hasEntryAssignment());
1595
+
1596
+ PhysToWorkMap* entryPhysToWorkMap = clonePhysToWorkMap(fromAssignment.physToWorkMap());
1597
+ if (ASMJIT_UNLIKELY(!entryPhysToWorkMap))
1598
+ return DebugUtils::errored(kErrorOutOfMemory);
1599
+
1600
+ block->setEntryAssignment(entryPhysToWorkMap);
1601
+
1602
+ const ZoneBitVector& liveIn = block->liveIn();
1603
+ sharedLiveIn.or_(liveIn);
1604
+
1605
+ for (RegGroup group : RegGroupVirtValues{}) {
1606
+ sharedAssigned[group] |= entryPhysToWorkMap->assigned[group];
1607
+
1608
+ uint32_t physBaseIndex = _physRegIndex.get(group);
1609
+ Support::BitWordIterator<RegMask> it(entryPhysToWorkMap->assigned[group]);
1610
+
1611
+ while (it.hasNext()) {
1612
+ uint32_t physId = it.next();
1613
+ uint32_t workId = entryPhysToWorkMap->workIds[physBaseIndex + physId];
1614
+
1615
+ if (!liveIn.bitAt(workId))
1616
+ entryPhysToWorkMap->unassign(group, physId, physBaseIndex + physId);
1617
+ }
1618
+ }
1619
+ }
1620
+ }
1621
+
1622
+ for (RegGroup group : RegGroupVirtValues{}) {
1623
+ uint32_t physBaseIndex = _physRegIndex.get(group);
1624
+ Support::BitWordIterator<RegMask> it(_availableRegs[group] & ~sharedAssigned[group]);
1625
+
1626
+ while (it.hasNext()) {
1627
+ uint32_t physId = it.next();
1628
+ if (Support::bitTest(physToWorkMap->assigned[group], physId))
1629
+ physToWorkMap->unassign(group, physId, physBaseIndex + physId);
1630
+ }
1631
+ }
1632
+
1633
+ return blockEntryAssigned(physToWorkMap);
1634
+ }
1635
+
1636
+ Error BaseRAPass::blockEntryAssigned(const PhysToWorkMap* physToWorkMap) noexcept {
1637
+ // Complex allocation strategy requires to record register assignments upon block entry (or per shared state).
1638
+ for (RegGroup group : RegGroupVirtValues{}) {
1639
+ if (!_strategy[group].isComplex())
1640
+ continue;
1641
+
1642
+ uint32_t physBaseIndex = _physRegIndex[group];
1643
+ Support::BitWordIterator<RegMask> it(physToWorkMap->assigned[group]);
1644
+
1645
+ while (it.hasNext()) {
1646
+ uint32_t physId = it.next();
1647
+ uint32_t workId = physToWorkMap->workIds[physBaseIndex + physId];
1648
+
1649
+ RAWorkReg* workReg = workRegById(workId);
1650
+ workReg->addAllocatedMask(Support::bitMask(physId));
1651
+ }
1652
+ }
1653
+
1654
+ return kErrorOk;
1655
+ }
1656
+
1657
+ // BaseRAPass - Allocation - Utilities
1658
+ // ===================================
1659
+
1660
+ Error BaseRAPass::useTemporaryMem(BaseMem& out, uint32_t size, uint32_t alignment) noexcept {
1661
+ ASMJIT_ASSERT(alignment <= 64);
1662
+
1663
+ if (_temporaryMem.isNone()) {
1664
+ ASMJIT_PROPAGATE(cc()->_newStack(&_temporaryMem.as<BaseMem>(), size, alignment));
1665
+ }
1666
+ else {
1667
+ ASMJIT_ASSERT(_temporaryMem.as<BaseMem>().isRegHome());
1668
+
1669
+ uint32_t virtId = _temporaryMem.as<BaseMem>().baseId();
1670
+ VirtReg* virtReg = cc()->virtRegById(virtId);
1671
+
1672
+ cc()->setStackSize(virtId, Support::max(virtReg->virtSize(), size),
1673
+ Support::max(virtReg->alignment(), alignment));
1674
+ }
1675
+
1676
+ out = _temporaryMem.as<BaseMem>();
1677
+ return kErrorOk;
1678
+ }
1679
+
1680
+ // BaseRAPass - Allocation - Prolog & Epilog
1681
+ // =========================================
1682
+
1683
+ Error BaseRAPass::updateStackFrame() noexcept {
1684
+ // Update some StackFrame information that we updated during allocation. The only information we don't have at the
1685
+ // moment is final local stack size, which is calculated last.
1686
+ FuncFrame& frame = func()->frame();
1687
+ for (RegGroup group : RegGroupVirtValues{})
1688
+ frame.addDirtyRegs(group, _clobberedRegs[group]);
1689
+ frame.setLocalStackAlignment(_stackAllocator.alignment());
1690
+
1691
+ // If there are stack arguments that are not assigned to registers upon entry and the function doesn't require
1692
+ // dynamic stack alignment we keep these arguments where they are. This will also mark all stack slots that match
1693
+ // these arguments as allocated.
1694
+ if (_numStackArgsToStackSlots)
1695
+ ASMJIT_PROPAGATE(_markStackArgsToKeep());
1696
+
1697
+ // Calculate offsets of all stack slots and update StackSize to reflect the calculated local stack size.
1698
+ ASMJIT_PROPAGATE(_stackAllocator.calculateStackFrame());
1699
+ frame.setLocalStackSize(_stackAllocator.stackSize());
1700
+
1701
+ // Update the stack frame based on `_argsAssignment` and finalize it. Finalization means to apply final calculation
1702
+ // to the stack layout.
1703
+ ASMJIT_PROPAGATE(_argsAssignment.updateFuncFrame(frame));
1704
+ ASMJIT_PROPAGATE(frame.finalize());
1705
+
1706
+ // StackAllocator allocates all stots starting from [0], adjust them when necessary.
1707
+ if (frame.localStackOffset() != 0)
1708
+ ASMJIT_PROPAGATE(_stackAllocator.adjustSlotOffsets(int32_t(frame.localStackOffset())));
1709
+
1710
+ // Again, if there are stack arguments allocated in function's stack we have to handle them. This handles all cases
1711
+ // (either regular or dynamic stack alignment).
1712
+ if (_numStackArgsToStackSlots)
1713
+ ASMJIT_PROPAGATE(_updateStackArgs());
1714
+
1715
+ return kErrorOk;
1716
+ }
1717
+
1718
+ Error BaseRAPass::_markStackArgsToKeep() noexcept {
1719
+ FuncFrame& frame = func()->frame();
1720
+ bool hasSAReg = frame.hasPreservedFP() || !frame.hasDynamicAlignment();
1721
+
1722
+ RAWorkRegs& workRegs = _workRegs;
1723
+ uint32_t numWorkRegs = workRegCount();
1724
+
1725
+ for (uint32_t workId = 0; workId < numWorkRegs; workId++) {
1726
+ RAWorkReg* workReg = workRegs[workId];
1727
+ if (workReg->hasFlag(RAWorkRegFlags::kStackArgToStack)) {
1728
+ ASMJIT_ASSERT(workReg->hasArgIndex());
1729
+ const FuncValue& srcArg = _func->detail().arg(workReg->argIndex());
1730
+
1731
+ // If the register doesn't have stack slot then we failed. It doesn't make much sense as it was marked as
1732
+ // `kFlagStackArgToStack`, which requires the WorkReg was live-in upon function entry.
1733
+ RAStackSlot* slot = workReg->stackSlot();
1734
+ if (ASMJIT_UNLIKELY(!slot))
1735
+ return DebugUtils::errored(kErrorInvalidState);
1736
+
1737
+ if (hasSAReg && srcArg.isStack() && !srcArg.isIndirect()) {
1738
+ uint32_t typeSize = TypeUtils::sizeOf(srcArg.typeId());
1739
+ if (typeSize == slot->size()) {
1740
+ slot->addFlags(RAStackSlot::kFlagStackArg);
1741
+ continue;
1742
+ }
1743
+ }
1744
+
1745
+ // NOTE: Update StackOffset here so when `_argsAssignment.updateFuncFrame()` is called it will take into
1746
+ // consideration moving to stack slots. Without this we may miss some scratch registers later.
1747
+ FuncValue& dstArg = _argsAssignment.arg(workReg->argIndex(), workReg->argValueIndex());
1748
+ dstArg.assignStackOffset(0);
1749
+ }
1750
+ }
1751
+
1752
+ return kErrorOk;
1753
+ }
1754
+
1755
+ Error BaseRAPass::_updateStackArgs() noexcept {
1756
+ FuncFrame& frame = func()->frame();
1757
+ RAWorkRegs& workRegs = _workRegs;
1758
+ uint32_t numWorkRegs = workRegCount();
1759
+
1760
+ for (uint32_t workId = 0; workId < numWorkRegs; workId++) {
1761
+ RAWorkReg* workReg = workRegs[workId];
1762
+ if (workReg->hasFlag(RAWorkRegFlags::kStackArgToStack)) {
1763
+ ASMJIT_ASSERT(workReg->hasArgIndex());
1764
+ RAStackSlot* slot = workReg->stackSlot();
1765
+
1766
+ if (ASMJIT_UNLIKELY(!slot))
1767
+ return DebugUtils::errored(kErrorInvalidState);
1768
+
1769
+ if (slot->isStackArg()) {
1770
+ const FuncValue& srcArg = _func->detail().arg(workReg->argIndex());
1771
+ if (frame.hasPreservedFP()) {
1772
+ slot->setBaseRegId(_fp.id());
1773
+ slot->setOffset(int32_t(frame.saOffsetFromSA()) + srcArg.stackOffset());
1774
+ }
1775
+ else {
1776
+ slot->setOffset(int32_t(frame.saOffsetFromSP()) + srcArg.stackOffset());
1777
+ }
1778
+ }
1779
+ else {
1780
+ FuncValue& dstArg = _argsAssignment.arg(workReg->argIndex(), workReg->argValueIndex());
1781
+ dstArg.setStackOffset(slot->offset());
1782
+ }
1783
+ }
1784
+ }
1785
+
1786
+ return kErrorOk;
1787
+ }
1788
+
1789
+ Error BaseRAPass::insertPrologEpilog() noexcept {
1790
+ FuncFrame& frame = _func->frame();
1791
+
1792
+ cc()->_setCursor(func());
1793
+ ASMJIT_PROPAGATE(cc()->emitProlog(frame));
1794
+ ASMJIT_PROPAGATE(_iEmitHelper->emitArgsAssignment(frame, _argsAssignment));
1795
+
1796
+ cc()->_setCursor(func()->exitNode());
1797
+ ASMJIT_PROPAGATE(cc()->emitEpilog(frame));
1798
+
1799
+ return kErrorOk;
1800
+ }
1801
+
1802
+ // BaseRAPass - Rewriter
1803
+ // =====================
1804
+
1805
+ Error BaseRAPass::rewrite() noexcept {
1806
+ return _rewrite(_func, _stop);
1807
+ }
1808
+
1809
+ // BaseRAPass - Logging
1810
+ // ====================
1811
+
1812
+ #ifndef ASMJIT_NO_LOGGING
1813
+ static void RAPass_formatLiveness(BaseRAPass* pass, String& sb, const RAInst* raInst) noexcept {
1814
+ const RATiedReg* tiedRegs = raInst->tiedRegs();
1815
+ uint32_t tiedCount = raInst->tiedCount();
1816
+
1817
+ for (uint32_t i = 0; i < tiedCount; i++) {
1818
+ const RATiedReg& tiedReg = tiedRegs[i];
1819
+
1820
+ if (i != 0)
1821
+ sb.append(' ');
1822
+
1823
+ sb.appendFormat("%s{", pass->workRegById(tiedReg.workId())->name());
1824
+ sb.append(tiedReg.isReadWrite() ? 'X' :
1825
+ tiedReg.isRead() ? 'R' :
1826
+ tiedReg.isWrite() ? 'W' : '?');
1827
+
1828
+ if (tiedReg.isLeadConsecutive())
1829
+ sb.appendFormat("|Lead[%u]", tiedReg.consecutiveData() + 1u);
1830
+
1831
+ if (tiedReg.hasUseId())
1832
+ sb.appendFormat("|Use=%u", tiedReg.useId());
1833
+ else if (tiedReg.isUse())
1834
+ sb.append("|Use");
1835
+
1836
+ if (tiedReg.isUseConsecutive() && !tiedReg.isLeadConsecutive())
1837
+ sb.appendFormat("+%u", tiedReg.consecutiveData());
1838
+
1839
+ if (tiedReg.hasOutId())
1840
+ sb.appendFormat("|Out=%u", tiedReg.outId());
1841
+ else if (tiedReg.isOut())
1842
+ sb.append("|Out");
1843
+
1844
+ if (tiedReg.isOutConsecutive() && !tiedReg.isLeadConsecutive())
1845
+ sb.appendFormat("+%u", tiedReg.consecutiveData());
1846
+
1847
+ if (tiedReg.isLast())
1848
+ sb.append("|Last");
1849
+
1850
+ if (tiedReg.isKill())
1851
+ sb.append("|Kill");
1852
+
1853
+ sb.append("}");
1854
+ }
1855
+ }
1856
+
1857
+ ASMJIT_FAVOR_SIZE Error BaseRAPass::annotateCode() noexcept {
1858
+ StringTmp<1024> sb;
1859
+
1860
+ for (const RABlock* block : _blocks) {
1861
+ BaseNode* node = block->first();
1862
+ if (!node) continue;
1863
+
1864
+ BaseNode* last = block->last();
1865
+ for (;;) {
1866
+ sb.clear();
1867
+ Formatter::formatNode(sb, _formatOptions, cc(), node);
1868
+
1869
+ if (hasDiagnosticOption(DiagnosticOptions::kRADebugLiveness) && node->isInst() && node->hasPassData()) {
1870
+ const RAInst* raInst = node->passData<RAInst>();
1871
+ if (raInst->tiedCount() > 0) {
1872
+ sb.padEnd(40);
1873
+ sb.append(" | ");
1874
+ RAPass_formatLiveness(this, sb, raInst);
1875
+ }
1876
+ }
1877
+
1878
+ node->setInlineComment(static_cast<char*>(cc()->_dataZone.dup(sb.data(), sb.size(), true)));
1879
+ if (node == last)
1880
+ break;
1881
+ node = node->next();
1882
+ }
1883
+ }
1884
+
1885
+ return kErrorOk;
1886
+ }
1887
+
1888
+ ASMJIT_FAVOR_SIZE Error BaseRAPass::_dumpBlockIds(String& sb, const RABlocks& blocks) noexcept {
1889
+ for (uint32_t i = 0, size = blocks.size(); i < size; i++) {
1890
+ const RABlock* block = blocks[i];
1891
+ if (i != 0)
1892
+ ASMJIT_PROPAGATE(sb.appendFormat(", #%u", block->blockId()));
1893
+ else
1894
+ ASMJIT_PROPAGATE(sb.appendFormat("#%u", block->blockId()));
1895
+ }
1896
+ return kErrorOk;
1897
+ }
1898
+
1899
+ ASMJIT_FAVOR_SIZE Error BaseRAPass::_dumpBlockLiveness(String& sb, const RABlock* block) noexcept {
1900
+ for (uint32_t liveType = 0; liveType < RABlock::kLiveCount; liveType++) {
1901
+ const char* bitsName = liveType == RABlock::kLiveIn ? "IN " :
1902
+ liveType == RABlock::kLiveOut ? "OUT " :
1903
+ liveType == RABlock::kLiveGen ? "GEN " : "KILL";
1904
+
1905
+ const ZoneBitVector& bits = block->_liveBits[liveType];
1906
+ uint32_t size = bits.size();
1907
+ ASMJIT_ASSERT(size <= workRegCount());
1908
+
1909
+ uint32_t n = 0;
1910
+ for (uint32_t workId = 0; workId < size; workId++) {
1911
+ if (bits.bitAt(workId)) {
1912
+ RAWorkReg* wReg = workRegById(workId);
1913
+
1914
+ if (!n)
1915
+ sb.appendFormat(" %s [", bitsName);
1916
+ else
1917
+ sb.append(", ");
1918
+
1919
+ sb.append(wReg->name());
1920
+ n++;
1921
+ }
1922
+ }
1923
+
1924
+ if (n)
1925
+ sb.append("]\n");
1926
+ }
1927
+
1928
+ return kErrorOk;
1929
+ }
1930
+
1931
+ ASMJIT_FAVOR_SIZE Error BaseRAPass::_dumpLiveSpans(String& sb) noexcept {
1932
+ uint32_t numWorkRegs = _workRegs.size();
1933
+ uint32_t maxSize = _maxWorkRegNameSize;
1934
+
1935
+ for (uint32_t workId = 0; workId < numWorkRegs; workId++) {
1936
+ RAWorkReg* workReg = _workRegs[workId];
1937
+
1938
+ sb.append(" ");
1939
+
1940
+ size_t oldSize = sb.size();
1941
+ sb.append(workReg->name());
1942
+ sb.padEnd(oldSize + maxSize);
1943
+
1944
+ RALiveStats& stats = workReg->liveStats();
1945
+ sb.appendFormat(" {id:%04u width: %-4u freq: %0.4f priority=%0.4f}",
1946
+ workReg->virtId(),
1947
+ stats.width(),
1948
+ stats.freq(),
1949
+ stats.priority());
1950
+ sb.append(": ");
1951
+
1952
+ LiveRegSpans& liveSpans = workReg->liveSpans();
1953
+ for (uint32_t x = 0; x < liveSpans.size(); x++) {
1954
+ const LiveRegSpan& liveSpan = liveSpans[x];
1955
+ if (x)
1956
+ sb.append(", ");
1957
+ sb.appendFormat("[%u:%u]", liveSpan.a, liveSpan.b);
1958
+ }
1959
+
1960
+ sb.append('\n');
1961
+ }
1962
+
1963
+ return kErrorOk;
1964
+ }
1965
+ #endif
1966
+
1967
+ ASMJIT_END_NAMESPACE
1968
+
1969
+ #endif // !ASMJIT_NO_COMPILER