asmjit 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (201) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +1 -1
  3. data/asmjit.gemspec +1 -1
  4. data/ext/asmjit/asmjit/.editorconfig +10 -0
  5. data/ext/asmjit/asmjit/.github/FUNDING.yml +1 -0
  6. data/ext/asmjit/asmjit/.github/workflows/build-config.json +47 -0
  7. data/ext/asmjit/asmjit/.github/workflows/build.yml +156 -0
  8. data/ext/asmjit/asmjit/.gitignore +6 -0
  9. data/ext/asmjit/asmjit/CMakeLists.txt +611 -0
  10. data/ext/asmjit/asmjit/LICENSE.md +17 -0
  11. data/ext/asmjit/asmjit/README.md +69 -0
  12. data/ext/asmjit/asmjit/src/asmjit/a64.h +62 -0
  13. data/ext/asmjit/asmjit/src/asmjit/arm/a64archtraits_p.h +81 -0
  14. data/ext/asmjit/asmjit/src/asmjit/arm/a64assembler.cpp +5115 -0
  15. data/ext/asmjit/asmjit/src/asmjit/arm/a64assembler.h +72 -0
  16. data/ext/asmjit/asmjit/src/asmjit/arm/a64builder.cpp +51 -0
  17. data/ext/asmjit/asmjit/src/asmjit/arm/a64builder.h +57 -0
  18. data/ext/asmjit/asmjit/src/asmjit/arm/a64compiler.cpp +60 -0
  19. data/ext/asmjit/asmjit/src/asmjit/arm/a64compiler.h +247 -0
  20. data/ext/asmjit/asmjit/src/asmjit/arm/a64emithelper.cpp +464 -0
  21. data/ext/asmjit/asmjit/src/asmjit/arm/a64emithelper_p.h +50 -0
  22. data/ext/asmjit/asmjit/src/asmjit/arm/a64emitter.h +1228 -0
  23. data/ext/asmjit/asmjit/src/asmjit/arm/a64formatter.cpp +298 -0
  24. data/ext/asmjit/asmjit/src/asmjit/arm/a64formatter_p.h +59 -0
  25. data/ext/asmjit/asmjit/src/asmjit/arm/a64func.cpp +189 -0
  26. data/ext/asmjit/asmjit/src/asmjit/arm/a64func_p.h +33 -0
  27. data/ext/asmjit/asmjit/src/asmjit/arm/a64globals.h +1894 -0
  28. data/ext/asmjit/asmjit/src/asmjit/arm/a64instapi.cpp +278 -0
  29. data/ext/asmjit/asmjit/src/asmjit/arm/a64instapi_p.h +41 -0
  30. data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb.cpp +1957 -0
  31. data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb.h +74 -0
  32. data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb_p.h +876 -0
  33. data/ext/asmjit/asmjit/src/asmjit/arm/a64operand.cpp +85 -0
  34. data/ext/asmjit/asmjit/src/asmjit/arm/a64operand.h +312 -0
  35. data/ext/asmjit/asmjit/src/asmjit/arm/a64rapass.cpp +852 -0
  36. data/ext/asmjit/asmjit/src/asmjit/arm/a64rapass_p.h +105 -0
  37. data/ext/asmjit/asmjit/src/asmjit/arm/a64utils.h +179 -0
  38. data/ext/asmjit/asmjit/src/asmjit/arm/armformatter.cpp +143 -0
  39. data/ext/asmjit/asmjit/src/asmjit/arm/armformatter_p.h +44 -0
  40. data/ext/asmjit/asmjit/src/asmjit/arm/armglobals.h +21 -0
  41. data/ext/asmjit/asmjit/src/asmjit/arm/armoperand.h +621 -0
  42. data/ext/asmjit/asmjit/src/asmjit/arm.h +62 -0
  43. data/ext/asmjit/asmjit/src/asmjit/asmjit-scope-begin.h +17 -0
  44. data/ext/asmjit/asmjit/src/asmjit/asmjit-scope-end.h +9 -0
  45. data/ext/asmjit/asmjit/src/asmjit/asmjit.h +33 -0
  46. data/ext/asmjit/asmjit/src/asmjit/core/api-build_p.h +55 -0
  47. data/ext/asmjit/asmjit/src/asmjit/core/api-config.h +613 -0
  48. data/ext/asmjit/asmjit/src/asmjit/core/archcommons.h +229 -0
  49. data/ext/asmjit/asmjit/src/asmjit/core/archtraits.cpp +160 -0
  50. data/ext/asmjit/asmjit/src/asmjit/core/archtraits.h +290 -0
  51. data/ext/asmjit/asmjit/src/asmjit/core/assembler.cpp +406 -0
  52. data/ext/asmjit/asmjit/src/asmjit/core/assembler.h +129 -0
  53. data/ext/asmjit/asmjit/src/asmjit/core/builder.cpp +889 -0
  54. data/ext/asmjit/asmjit/src/asmjit/core/builder.h +1391 -0
  55. data/ext/asmjit/asmjit/src/asmjit/core/codebuffer.h +113 -0
  56. data/ext/asmjit/asmjit/src/asmjit/core/codeholder.cpp +1149 -0
  57. data/ext/asmjit/asmjit/src/asmjit/core/codeholder.h +1035 -0
  58. data/ext/asmjit/asmjit/src/asmjit/core/codewriter.cpp +175 -0
  59. data/ext/asmjit/asmjit/src/asmjit/core/codewriter_p.h +179 -0
  60. data/ext/asmjit/asmjit/src/asmjit/core/compiler.cpp +582 -0
  61. data/ext/asmjit/asmjit/src/asmjit/core/compiler.h +737 -0
  62. data/ext/asmjit/asmjit/src/asmjit/core/compilerdefs.h +173 -0
  63. data/ext/asmjit/asmjit/src/asmjit/core/constpool.cpp +363 -0
  64. data/ext/asmjit/asmjit/src/asmjit/core/constpool.h +250 -0
  65. data/ext/asmjit/asmjit/src/asmjit/core/cpuinfo.cpp +1162 -0
  66. data/ext/asmjit/asmjit/src/asmjit/core/cpuinfo.h +813 -0
  67. data/ext/asmjit/asmjit/src/asmjit/core/emithelper.cpp +323 -0
  68. data/ext/asmjit/asmjit/src/asmjit/core/emithelper_p.h +58 -0
  69. data/ext/asmjit/asmjit/src/asmjit/core/emitter.cpp +333 -0
  70. data/ext/asmjit/asmjit/src/asmjit/core/emitter.h +741 -0
  71. data/ext/asmjit/asmjit/src/asmjit/core/emitterutils.cpp +129 -0
  72. data/ext/asmjit/asmjit/src/asmjit/core/emitterutils_p.h +89 -0
  73. data/ext/asmjit/asmjit/src/asmjit/core/environment.cpp +46 -0
  74. data/ext/asmjit/asmjit/src/asmjit/core/environment.h +508 -0
  75. data/ext/asmjit/asmjit/src/asmjit/core/errorhandler.cpp +14 -0
  76. data/ext/asmjit/asmjit/src/asmjit/core/errorhandler.h +228 -0
  77. data/ext/asmjit/asmjit/src/asmjit/core/formatter.cpp +584 -0
  78. data/ext/asmjit/asmjit/src/asmjit/core/formatter.h +247 -0
  79. data/ext/asmjit/asmjit/src/asmjit/core/formatter_p.h +34 -0
  80. data/ext/asmjit/asmjit/src/asmjit/core/func.cpp +286 -0
  81. data/ext/asmjit/asmjit/src/asmjit/core/func.h +1445 -0
  82. data/ext/asmjit/asmjit/src/asmjit/core/funcargscontext.cpp +293 -0
  83. data/ext/asmjit/asmjit/src/asmjit/core/funcargscontext_p.h +199 -0
  84. data/ext/asmjit/asmjit/src/asmjit/core/globals.cpp +133 -0
  85. data/ext/asmjit/asmjit/src/asmjit/core/globals.h +393 -0
  86. data/ext/asmjit/asmjit/src/asmjit/core/inst.cpp +113 -0
  87. data/ext/asmjit/asmjit/src/asmjit/core/inst.h +772 -0
  88. data/ext/asmjit/asmjit/src/asmjit/core/jitallocator.cpp +1242 -0
  89. data/ext/asmjit/asmjit/src/asmjit/core/jitallocator.h +261 -0
  90. data/ext/asmjit/asmjit/src/asmjit/core/jitruntime.cpp +80 -0
  91. data/ext/asmjit/asmjit/src/asmjit/core/jitruntime.h +89 -0
  92. data/ext/asmjit/asmjit/src/asmjit/core/logger.cpp +69 -0
  93. data/ext/asmjit/asmjit/src/asmjit/core/logger.h +198 -0
  94. data/ext/asmjit/asmjit/src/asmjit/core/misc_p.h +33 -0
  95. data/ext/asmjit/asmjit/src/asmjit/core/operand.cpp +132 -0
  96. data/ext/asmjit/asmjit/src/asmjit/core/operand.h +1611 -0
  97. data/ext/asmjit/asmjit/src/asmjit/core/osutils.cpp +84 -0
  98. data/ext/asmjit/asmjit/src/asmjit/core/osutils.h +61 -0
  99. data/ext/asmjit/asmjit/src/asmjit/core/osutils_p.h +68 -0
  100. data/ext/asmjit/asmjit/src/asmjit/core/raassignment_p.h +418 -0
  101. data/ext/asmjit/asmjit/src/asmjit/core/rabuilders_p.h +612 -0
  102. data/ext/asmjit/asmjit/src/asmjit/core/radefs_p.h +1204 -0
  103. data/ext/asmjit/asmjit/src/asmjit/core/ralocal.cpp +1166 -0
  104. data/ext/asmjit/asmjit/src/asmjit/core/ralocal_p.h +254 -0
  105. data/ext/asmjit/asmjit/src/asmjit/core/rapass.cpp +1969 -0
  106. data/ext/asmjit/asmjit/src/asmjit/core/rapass_p.h +1183 -0
  107. data/ext/asmjit/asmjit/src/asmjit/core/rastack.cpp +184 -0
  108. data/ext/asmjit/asmjit/src/asmjit/core/rastack_p.h +171 -0
  109. data/ext/asmjit/asmjit/src/asmjit/core/string.cpp +559 -0
  110. data/ext/asmjit/asmjit/src/asmjit/core/string.h +372 -0
  111. data/ext/asmjit/asmjit/src/asmjit/core/support.cpp +494 -0
  112. data/ext/asmjit/asmjit/src/asmjit/core/support.h +1773 -0
  113. data/ext/asmjit/asmjit/src/asmjit/core/target.cpp +14 -0
  114. data/ext/asmjit/asmjit/src/asmjit/core/target.h +53 -0
  115. data/ext/asmjit/asmjit/src/asmjit/core/type.cpp +74 -0
  116. data/ext/asmjit/asmjit/src/asmjit/core/type.h +419 -0
  117. data/ext/asmjit/asmjit/src/asmjit/core/virtmem.cpp +722 -0
  118. data/ext/asmjit/asmjit/src/asmjit/core/virtmem.h +242 -0
  119. data/ext/asmjit/asmjit/src/asmjit/core/zone.cpp +353 -0
  120. data/ext/asmjit/asmjit/src/asmjit/core/zone.h +615 -0
  121. data/ext/asmjit/asmjit/src/asmjit/core/zonehash.cpp +309 -0
  122. data/ext/asmjit/asmjit/src/asmjit/core/zonehash.h +186 -0
  123. data/ext/asmjit/asmjit/src/asmjit/core/zonelist.cpp +163 -0
  124. data/ext/asmjit/asmjit/src/asmjit/core/zonelist.h +209 -0
  125. data/ext/asmjit/asmjit/src/asmjit/core/zonestack.cpp +176 -0
  126. data/ext/asmjit/asmjit/src/asmjit/core/zonestack.h +239 -0
  127. data/ext/asmjit/asmjit/src/asmjit/core/zonestring.h +120 -0
  128. data/ext/asmjit/asmjit/src/asmjit/core/zonetree.cpp +99 -0
  129. data/ext/asmjit/asmjit/src/asmjit/core/zonetree.h +380 -0
  130. data/ext/asmjit/asmjit/src/asmjit/core/zonevector.cpp +356 -0
  131. data/ext/asmjit/asmjit/src/asmjit/core/zonevector.h +690 -0
  132. data/ext/asmjit/asmjit/src/asmjit/core.h +1861 -0
  133. data/ext/asmjit/asmjit/src/asmjit/x86/x86archtraits_p.h +148 -0
  134. data/ext/asmjit/asmjit/src/asmjit/x86/x86assembler.cpp +5110 -0
  135. data/ext/asmjit/asmjit/src/asmjit/x86/x86assembler.h +685 -0
  136. data/ext/asmjit/asmjit/src/asmjit/x86/x86builder.cpp +52 -0
  137. data/ext/asmjit/asmjit/src/asmjit/x86/x86builder.h +351 -0
  138. data/ext/asmjit/asmjit/src/asmjit/x86/x86compiler.cpp +61 -0
  139. data/ext/asmjit/asmjit/src/asmjit/x86/x86compiler.h +721 -0
  140. data/ext/asmjit/asmjit/src/asmjit/x86/x86emithelper.cpp +619 -0
  141. data/ext/asmjit/asmjit/src/asmjit/x86/x86emithelper_p.h +60 -0
  142. data/ext/asmjit/asmjit/src/asmjit/x86/x86emitter.h +4315 -0
  143. data/ext/asmjit/asmjit/src/asmjit/x86/x86formatter.cpp +944 -0
  144. data/ext/asmjit/asmjit/src/asmjit/x86/x86formatter_p.h +58 -0
  145. data/ext/asmjit/asmjit/src/asmjit/x86/x86func.cpp +503 -0
  146. data/ext/asmjit/asmjit/src/asmjit/x86/x86func_p.h +33 -0
  147. data/ext/asmjit/asmjit/src/asmjit/x86/x86globals.h +2169 -0
  148. data/ext/asmjit/asmjit/src/asmjit/x86/x86instapi.cpp +1732 -0
  149. data/ext/asmjit/asmjit/src/asmjit/x86/x86instapi_p.h +41 -0
  150. data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb.cpp +4427 -0
  151. data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb.h +563 -0
  152. data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb_p.h +311 -0
  153. data/ext/asmjit/asmjit/src/asmjit/x86/x86opcode_p.h +436 -0
  154. data/ext/asmjit/asmjit/src/asmjit/x86/x86operand.cpp +231 -0
  155. data/ext/asmjit/asmjit/src/asmjit/x86/x86operand.h +1085 -0
  156. data/ext/asmjit/asmjit/src/asmjit/x86/x86rapass.cpp +1509 -0
  157. data/ext/asmjit/asmjit/src/asmjit/x86/x86rapass_p.h +94 -0
  158. data/ext/asmjit/asmjit/src/asmjit/x86.h +93 -0
  159. data/ext/asmjit/asmjit/src/asmjit.natvis +245 -0
  160. data/ext/asmjit/asmjit/test/asmjit_test_assembler.cpp +84 -0
  161. data/ext/asmjit/asmjit/test/asmjit_test_assembler.h +85 -0
  162. data/ext/asmjit/asmjit/test/asmjit_test_assembler_a64.cpp +4006 -0
  163. data/ext/asmjit/asmjit/test/asmjit_test_assembler_x64.cpp +17833 -0
  164. data/ext/asmjit/asmjit/test/asmjit_test_assembler_x86.cpp +8300 -0
  165. data/ext/asmjit/asmjit/test/asmjit_test_compiler.cpp +253 -0
  166. data/ext/asmjit/asmjit/test/asmjit_test_compiler.h +73 -0
  167. data/ext/asmjit/asmjit/test/asmjit_test_compiler_a64.cpp +690 -0
  168. data/ext/asmjit/asmjit/test/asmjit_test_compiler_x86.cpp +4317 -0
  169. data/ext/asmjit/asmjit/test/asmjit_test_emitters.cpp +197 -0
  170. data/ext/asmjit/asmjit/test/asmjit_test_instinfo.cpp +181 -0
  171. data/ext/asmjit/asmjit/test/asmjit_test_misc.h +257 -0
  172. data/ext/asmjit/asmjit/test/asmjit_test_perf.cpp +62 -0
  173. data/ext/asmjit/asmjit/test/asmjit_test_perf.h +61 -0
  174. data/ext/asmjit/asmjit/test/asmjit_test_perf_a64.cpp +699 -0
  175. data/ext/asmjit/asmjit/test/asmjit_test_perf_x86.cpp +5032 -0
  176. data/ext/asmjit/asmjit/test/asmjit_test_unit.cpp +172 -0
  177. data/ext/asmjit/asmjit/test/asmjit_test_x86_sections.cpp +172 -0
  178. data/ext/asmjit/asmjit/test/asmjitutils.h +38 -0
  179. data/ext/asmjit/asmjit/test/broken.cpp +312 -0
  180. data/ext/asmjit/asmjit/test/broken.h +148 -0
  181. data/ext/asmjit/asmjit/test/cmdline.h +61 -0
  182. data/ext/asmjit/asmjit/test/performancetimer.h +41 -0
  183. data/ext/asmjit/asmjit/tools/configure-makefiles.sh +13 -0
  184. data/ext/asmjit/asmjit/tools/configure-ninja.sh +13 -0
  185. data/ext/asmjit/asmjit/tools/configure-sanitizers.sh +13 -0
  186. data/ext/asmjit/asmjit/tools/configure-vs2019-x64.bat +2 -0
  187. data/ext/asmjit/asmjit/tools/configure-vs2019-x86.bat +2 -0
  188. data/ext/asmjit/asmjit/tools/configure-vs2022-x64.bat +2 -0
  189. data/ext/asmjit/asmjit/tools/configure-vs2022-x86.bat +2 -0
  190. data/ext/asmjit/asmjit/tools/configure-xcode.sh +8 -0
  191. data/ext/asmjit/asmjit/tools/enumgen.js +417 -0
  192. data/ext/asmjit/asmjit/tools/enumgen.sh +3 -0
  193. data/ext/asmjit/asmjit/tools/tablegen-arm.js +365 -0
  194. data/ext/asmjit/asmjit/tools/tablegen-arm.sh +3 -0
  195. data/ext/asmjit/asmjit/tools/tablegen-x86.js +2638 -0
  196. data/ext/asmjit/asmjit/tools/tablegen-x86.sh +3 -0
  197. data/ext/asmjit/asmjit/tools/tablegen.js +947 -0
  198. data/ext/asmjit/asmjit/tools/tablegen.sh +4 -0
  199. data/ext/asmjit/asmjit.cc +18 -0
  200. data/lib/asmjit/version.rb +1 -1
  201. metadata +197 -2
@@ -0,0 +1,1969 @@
1
+ // This file is part of AsmJit project <https://asmjit.com>
2
+ //
3
+ // See asmjit.h or LICENSE.md for license and copyright information
4
+ // SPDX-License-Identifier: Zlib
5
+
6
+ #include "../core/api-build_p.h"
7
+ #ifndef ASMJIT_NO_COMPILER
8
+
9
+ #include "../core/formatter.h"
10
+ #include "../core/ralocal_p.h"
11
+ #include "../core/rapass_p.h"
12
+ #include "../core/support.h"
13
+ #include "../core/type.h"
14
+ #include "../core/zonestack.h"
15
+
16
+ ASMJIT_BEGIN_NAMESPACE
17
+
18
+ // RABlock - Control Flow
19
+ // ======================
20
+
21
+ Error RABlock::appendSuccessor(RABlock* successor) noexcept {
22
+ RABlock* predecessor = this;
23
+
24
+ if (predecessor->hasSuccessor(successor))
25
+ return kErrorOk;
26
+
27
+ ASMJIT_PROPAGATE(successor->_predecessors.willGrow(allocator()));
28
+ ASMJIT_PROPAGATE(predecessor->_successors.willGrow(allocator()));
29
+
30
+ predecessor->_successors.appendUnsafe(successor);
31
+ successor->_predecessors.appendUnsafe(predecessor);
32
+
33
+ return kErrorOk;
34
+ }
35
+
36
+ Error RABlock::prependSuccessor(RABlock* successor) noexcept {
37
+ RABlock* predecessor = this;
38
+
39
+ if (predecessor->hasSuccessor(successor))
40
+ return kErrorOk;
41
+
42
+ ASMJIT_PROPAGATE(successor->_predecessors.willGrow(allocator()));
43
+ ASMJIT_PROPAGATE(predecessor->_successors.willGrow(allocator()));
44
+
45
+ predecessor->_successors.prependUnsafe(successor);
46
+ successor->_predecessors.prependUnsafe(predecessor);
47
+
48
+ return kErrorOk;
49
+ }
50
+
51
+ // BaseRAPass - Construction & Destruction
52
+ // =======================================
53
+
54
+ BaseRAPass::BaseRAPass() noexcept : FuncPass("BaseRAPass") {}
55
+ BaseRAPass::~BaseRAPass() noexcept {}
56
+
57
+ // BaseRAPass - RunOnFunction
58
+ // ==========================
59
+
60
+ static void BaseRAPass_reset(BaseRAPass* self, FuncDetail* funcDetail) noexcept {
61
+ ZoneAllocator* allocator = self->allocator();
62
+
63
+ self->_blocks.reset();
64
+ self->_exits.reset();
65
+ self->_pov.reset();
66
+ self->_workRegs.reset();
67
+ self->_instructionCount = 0;
68
+ self->_createdBlockCount = 0;
69
+
70
+ self->_sharedAssignments.reset();
71
+ self->_lastTimestamp = 0;
72
+
73
+ self->_archTraits = nullptr;
74
+ self->_physRegIndex.reset();
75
+ self->_physRegCount.reset();
76
+ self->_physRegTotal = 0;
77
+ self->_scratchRegIndexes.fill(BaseReg::kIdBad);
78
+
79
+ self->_availableRegs.reset();
80
+ self->_availableRegCount.reset();
81
+ self->_clobberedRegs.reset();
82
+
83
+ self->_workRegs.reset();
84
+ self->_workRegsOfGroup.forEach([](RAWorkRegs& regs) { regs.reset(); });
85
+ self->_strategy.forEach([](RAStrategy& strategy) { strategy.reset(); });
86
+ self->_globalLiveSpans.fill(nullptr);
87
+ self->_globalMaxLiveCount.reset();
88
+ self->_temporaryMem.reset();
89
+
90
+ self->_stackAllocator.reset(allocator);
91
+ self->_argsAssignment.reset(funcDetail);
92
+ self->_numStackArgsToStackSlots = 0;
93
+ self->_maxWorkRegNameSize = 0;
94
+ }
95
+
96
+ static void BaseRAPass_resetVirtRegData(BaseRAPass* self) noexcept {
97
+ for (RAWorkReg* wReg : self->_workRegs) {
98
+ VirtReg* vReg = wReg->virtReg();
99
+
100
+ // Update the information regarding the stack of the virtual register.
101
+ if (wReg->hasStackSlot()) {
102
+ RAStackSlot* slot = wReg->stackSlot();
103
+ vReg->assignStackSlot(slot->offset());
104
+ }
105
+
106
+ // Reset work reg association so it cannot be used by accident (RAWorkReg data will be destroyed).
107
+ vReg->_workReg = nullptr;
108
+ }
109
+ }
110
+
111
+ Error BaseRAPass::runOnFunction(Zone* zone, Logger* logger, FuncNode* func) {
112
+ _allocator.reset(zone);
113
+
114
+ #ifndef ASMJIT_NO_LOGGING
115
+ _logger = logger;
116
+ _formatOptions.reset();
117
+ _diagnosticOptions = DiagnosticOptions::kNone;
118
+
119
+ if (logger) {
120
+ _formatOptions = logger->options();
121
+ _diagnosticOptions = _cb->diagnosticOptions();
122
+ }
123
+ #else
124
+ DebugUtils::unused(logger);
125
+ #endif
126
+
127
+ // Initialize all core structures to use `zone` and `func`.
128
+ BaseNode* end = func->endNode();
129
+ _func = func;
130
+ _stop = end->next();
131
+ _extraBlock = end;
132
+
133
+ BaseRAPass_reset(this, &_func->_funcDetail);
134
+
135
+ // Initialize architecture-specific members.
136
+ onInit();
137
+
138
+ // Perform all allocation steps required.
139
+ Error err = onPerformAllSteps();
140
+
141
+ // Must be called regardless of the allocation status.
142
+ onDone();
143
+
144
+ // Reset possible connections introduced by the register allocator.
145
+ BaseRAPass_resetVirtRegData(this);
146
+
147
+ // Reset all core structures and everything that depends on the passed `Zone`.
148
+ BaseRAPass_reset(this, nullptr);
149
+ _allocator.reset(nullptr);
150
+
151
+ #ifndef ASMJIT_NO_LOGGING
152
+ _logger = nullptr;
153
+ _formatOptions.reset();
154
+ _diagnosticOptions = DiagnosticOptions::kNone;
155
+ #endif
156
+
157
+ _func = nullptr;
158
+ _stop = nullptr;
159
+ _extraBlock = nullptr;
160
+
161
+ // Reset `Zone` as nothing should persist between `runOnFunction()` calls.
162
+ zone->reset();
163
+
164
+ // We alter the compiler cursor, because it doesn't make sense to reference it after the compilation - some
165
+ // nodes may disappear and the old cursor can go out anyway.
166
+ cc()->_setCursor(cc()->lastNode());
167
+
168
+ return err;
169
+ }
170
+
171
+ Error BaseRAPass::onPerformAllSteps() noexcept {
172
+ ASMJIT_PROPAGATE(buildCFG());
173
+ ASMJIT_PROPAGATE(buildCFGViews());
174
+ ASMJIT_PROPAGATE(removeUnreachableCode());
175
+
176
+ ASMJIT_PROPAGATE(buildCFGDominators());
177
+ ASMJIT_PROPAGATE(buildLiveness());
178
+ ASMJIT_PROPAGATE(assignArgIndexToWorkRegs());
179
+
180
+ #ifndef ASMJIT_NO_LOGGING
181
+ if (hasDiagnosticOption(DiagnosticOptions::kRAAnnotate))
182
+ ASMJIT_PROPAGATE(annotateCode());
183
+ #endif
184
+
185
+ ASMJIT_PROPAGATE(runGlobalAllocator());
186
+ ASMJIT_PROPAGATE(runLocalAllocator());
187
+
188
+ ASMJIT_PROPAGATE(updateStackFrame());
189
+ ASMJIT_PROPAGATE(insertPrologEpilog());
190
+
191
+ ASMJIT_PROPAGATE(rewrite());
192
+
193
+ return kErrorOk;
194
+ }
195
+
196
+ // BaseRAPass - CFG - Basic Block Management
197
+ // =========================================
198
+
199
+ RABlock* BaseRAPass::newBlock(BaseNode* initialNode) noexcept {
200
+ RABlock* block = zone()->newT<RABlock>(this);
201
+ if (ASMJIT_UNLIKELY(!block))
202
+ return nullptr;
203
+
204
+ block->setFirst(initialNode);
205
+ block->setLast(initialNode);
206
+
207
+ _createdBlockCount++;
208
+ return block;
209
+ }
210
+
211
+ RABlock* BaseRAPass::newBlockOrExistingAt(LabelNode* cbLabel, BaseNode** stoppedAt) noexcept {
212
+ if (cbLabel->hasPassData())
213
+ return cbLabel->passData<RABlock>();
214
+
215
+ FuncNode* func = this->func();
216
+ BaseNode* node = cbLabel->prev();
217
+ RABlock* block = nullptr;
218
+
219
+ // Try to find some label, but terminate the loop on any code. We try hard to coalesce code that contains two
220
+ // consecutive labels or a combination of non-code nodes between 2 or more labels.
221
+ //
222
+ // Possible cases that would share the same basic block:
223
+ //
224
+ // 1. Two or more consecutive labels:
225
+ // Label1:
226
+ // Label2:
227
+ //
228
+ // 2. Two or more labels separated by non-code nodes:
229
+ // Label1:
230
+ // ; Some comment...
231
+ // .align 16
232
+ // Label2:
233
+ size_t nPendingLabels = 0;
234
+
235
+ while (node) {
236
+ if (node->type() == NodeType::kLabel) {
237
+ // Function has a different NodeType, just make sure this was not messed up as we must never associate
238
+ // BasicBlock with a `func` itself.
239
+ ASMJIT_ASSERT(node != func);
240
+
241
+ block = node->passData<RABlock>();
242
+ if (block) {
243
+ // Exit node has always a block associated with it. If we went here it means that `cbLabel` passed here
244
+ // is after the end of the function and cannot be merged with the function exit block.
245
+ if (node == func->exitNode())
246
+ block = nullptr;
247
+ break;
248
+ }
249
+
250
+ nPendingLabels++;
251
+ }
252
+ else if (node->type() == NodeType::kAlign) {
253
+ // Align node is fine.
254
+ }
255
+ else {
256
+ break;
257
+ }
258
+
259
+ node = node->prev();
260
+ }
261
+
262
+ if (stoppedAt)
263
+ *stoppedAt = node;
264
+
265
+ if (!block) {
266
+ block = newBlock();
267
+ if (ASMJIT_UNLIKELY(!block))
268
+ return nullptr;
269
+ }
270
+
271
+ cbLabel->setPassData<RABlock>(block);
272
+ node = cbLabel;
273
+
274
+ while (nPendingLabels) {
275
+ node = node->prev();
276
+ for (;;) {
277
+ if (node->type() == NodeType::kLabel) {
278
+ node->setPassData<RABlock>(block);
279
+ nPendingLabels--;
280
+ break;
281
+ }
282
+
283
+ node = node->prev();
284
+ ASMJIT_ASSERT(node != nullptr);
285
+ }
286
+ }
287
+
288
+ if (!block->first()) {
289
+ block->setFirst(node);
290
+ block->setLast(cbLabel);
291
+ }
292
+
293
+ return block;
294
+ }
295
+
296
+ Error BaseRAPass::addBlock(RABlock* block) noexcept {
297
+ ASMJIT_PROPAGATE(_blocks.willGrow(allocator()));
298
+
299
+ block->_blockId = blockCount();
300
+ _blocks.appendUnsafe(block);
301
+ return kErrorOk;
302
+ }
303
+
304
+ // BaseRAPass - CFG - Build
305
+ // ========================
306
+
307
+ Error BaseRAPass::initSharedAssignments(const ZoneVector<uint32_t>& sharedAssignmentsMap) noexcept {
308
+ if (sharedAssignmentsMap.empty())
309
+ return kErrorOk;
310
+
311
+ uint32_t count = 0;
312
+ for (RABlock* block : _blocks) {
313
+ if (block->hasSharedAssignmentId()) {
314
+ uint32_t sharedAssignmentId = sharedAssignmentsMap[block->sharedAssignmentId()];
315
+ block->setSharedAssignmentId(sharedAssignmentId);
316
+ count = Support::max(count, sharedAssignmentId + 1);
317
+ }
318
+ }
319
+
320
+ ASMJIT_PROPAGATE(_sharedAssignments.resize(allocator(), count));
321
+
322
+ // Aggregate all entry scratch GP regs from blocks of the same assignment to the assignment itself. It will then be
323
+ // used instead of RABlock's own scratch regs mask, as shared assignments have precedence.
324
+ for (RABlock* block : _blocks) {
325
+ if (block->hasJumpTable()) {
326
+ const RABlocks& successors = block->successors();
327
+ if (!successors.empty()) {
328
+ RABlock* firstSuccessor = successors[0];
329
+ // NOTE: Shared assignments connect all possible successors so we only need the first to propagate exit scratch
330
+ // GP registers.
331
+ ASMJIT_ASSERT(firstSuccessor->hasSharedAssignmentId());
332
+ RASharedAssignment& sa = _sharedAssignments[firstSuccessor->sharedAssignmentId()];
333
+ sa.addEntryScratchGpRegs(block->exitScratchGpRegs());
334
+ }
335
+ }
336
+ if (block->hasSharedAssignmentId()) {
337
+ RASharedAssignment& sa = _sharedAssignments[block->sharedAssignmentId()];
338
+ sa.addEntryScratchGpRegs(block->_entryScratchGpRegs);
339
+ }
340
+ }
341
+
342
+ return kErrorOk;
343
+ }
344
+
345
+ // BaseRAPass - CFG - Views Order
346
+ // ==============================
347
+
348
+ class RABlockVisitItem {
349
+ public:
350
+ inline RABlockVisitItem(RABlock* block, uint32_t index) noexcept
351
+ : _block(block),
352
+ _index(index) {}
353
+
354
+ inline RABlockVisitItem(const RABlockVisitItem& other) noexcept
355
+ : _block(other._block),
356
+ _index(other._index) {}
357
+
358
+ inline RABlockVisitItem& operator=(const RABlockVisitItem& other) noexcept = default;
359
+
360
+ inline RABlock* block() const noexcept { return _block; }
361
+ inline uint32_t index() const noexcept { return _index; }
362
+
363
+ RABlock* _block;
364
+ uint32_t _index;
365
+ };
366
+
367
+ Error BaseRAPass::buildCFGViews() noexcept {
368
+ #ifndef ASMJIT_NO_LOGGING
369
+ Logger* logger = getLoggerIf(DiagnosticOptions::kRADebugCFG);
370
+ ASMJIT_RA_LOG_FORMAT("[BuildCFGViews]\n");
371
+ #endif
372
+
373
+ uint32_t count = blockCount();
374
+ if (ASMJIT_UNLIKELY(!count)) return kErrorOk;
375
+
376
+ ASMJIT_PROPAGATE(_pov.reserve(allocator(), count));
377
+
378
+ ZoneStack<RABlockVisitItem> stack;
379
+ ASMJIT_PROPAGATE(stack.init(allocator()));
380
+
381
+ ZoneBitVector visited;
382
+ ASMJIT_PROPAGATE(visited.resize(allocator(), count));
383
+
384
+ RABlock* current = _blocks[0];
385
+ uint32_t i = 0;
386
+
387
+ for (;;) {
388
+ for (;;) {
389
+ if (i >= current->successors().size())
390
+ break;
391
+
392
+ // Skip if already visited.
393
+ RABlock* child = current->successors()[i++];
394
+ if (visited.bitAt(child->blockId()))
395
+ continue;
396
+
397
+ // Mark as visited to prevent visiting the same block multiple times.
398
+ visited.setBit(child->blockId(), true);
399
+
400
+ // Add the current block on the stack, we will get back to it later.
401
+ ASMJIT_PROPAGATE(stack.append(RABlockVisitItem(current, i)));
402
+ current = child;
403
+ i = 0;
404
+ }
405
+
406
+ current->makeReachable();
407
+ current->_povOrder = _pov.size();
408
+ _pov.appendUnsafe(current);
409
+
410
+ if (stack.empty())
411
+ break;
412
+
413
+ RABlockVisitItem top = stack.pop();
414
+ current = top.block();
415
+ i = top.index();
416
+ }
417
+
418
+ ASMJIT_RA_LOG_COMPLEX({
419
+ StringTmp<1024> sb;
420
+ for (RABlock* block : blocks()) {
421
+ sb.clear();
422
+ if (block->hasSuccessors()) {
423
+ sb.appendFormat(" #%u -> {", block->blockId());
424
+ _dumpBlockIds(sb, block->successors());
425
+ sb.append("}\n");
426
+ }
427
+ else {
428
+ sb.appendFormat(" #%u -> {Exit}\n", block->blockId());
429
+ }
430
+ logger->log(sb);
431
+ }
432
+ });
433
+
434
+ visited.release(allocator());
435
+ return kErrorOk;
436
+ }
437
+
438
+ // BaseRAPass - CFG - Dominators
439
+ // =============================
440
+
441
+ static ASMJIT_FORCE_INLINE RABlock* intersectBlocks(RABlock* b1, RABlock* b2) noexcept {
442
+ while (b1 != b2) {
443
+ while (b2->povOrder() > b1->povOrder()) b1 = b1->iDom();
444
+ while (b1->povOrder() > b2->povOrder()) b2 = b2->iDom();
445
+ }
446
+ return b1;
447
+ }
448
+
449
+ // Based on "A Simple, Fast Dominance Algorithm".
450
+ Error BaseRAPass::buildCFGDominators() noexcept {
451
+ #ifndef ASMJIT_NO_LOGGING
452
+ Logger* logger = getLoggerIf(DiagnosticOptions::kRADebugCFG);
453
+ ASMJIT_RA_LOG_FORMAT("[BuildCFGDominators]\n");
454
+ #endif
455
+
456
+ if (_blocks.empty())
457
+ return kErrorOk;
458
+
459
+ RABlock* entryBlock = this->entryBlock();
460
+ entryBlock->setIDom(entryBlock);
461
+
462
+ bool changed = true;
463
+ uint32_t nIters = 0;
464
+
465
+ while (changed) {
466
+ nIters++;
467
+ changed = false;
468
+
469
+ uint32_t i = _pov.size();
470
+ while (i) {
471
+ RABlock* block = _pov[--i];
472
+ if (block == entryBlock)
473
+ continue;
474
+
475
+ RABlock* iDom = nullptr;
476
+ const RABlocks& preds = block->predecessors();
477
+
478
+ uint32_t j = preds.size();
479
+ while (j) {
480
+ RABlock* p = preds[--j];
481
+ if (!p->iDom())
482
+ continue;
483
+ iDom = !iDom ? p : intersectBlocks(iDom, p);
484
+ }
485
+
486
+ if (block->iDom() != iDom) {
487
+ ASMJIT_ASSUME(iDom != nullptr);
488
+ ASMJIT_RA_LOG_FORMAT(" IDom of #%u -> #%u\n", block->blockId(), iDom->blockId());
489
+ block->setIDom(iDom);
490
+ changed = true;
491
+ }
492
+ }
493
+ }
494
+
495
+ ASMJIT_RA_LOG_FORMAT(" Done (%u iterations)\n", nIters);
496
+ return kErrorOk;
497
+ }
498
+
499
+ bool BaseRAPass::_strictlyDominates(const RABlock* a, const RABlock* b) const noexcept {
500
+ ASMJIT_ASSERT(a != nullptr); // There must be at least one block if this function is
501
+ ASMJIT_ASSERT(b != nullptr); // called, as both `a` and `b` must be valid blocks.
502
+ ASMJIT_ASSERT(a != b); // Checked by `dominates()` and `strictlyDominates()`.
503
+
504
+ // Nothing strictly dominates the entry block.
505
+ const RABlock* entryBlock = this->entryBlock();
506
+ if (a == entryBlock)
507
+ return false;
508
+
509
+ const RABlock* iDom = b->iDom();
510
+ while (iDom != a && iDom != entryBlock)
511
+ iDom = iDom->iDom();
512
+
513
+ return iDom != entryBlock;
514
+ }
515
+
516
+ const RABlock* BaseRAPass::_nearestCommonDominator(const RABlock* a, const RABlock* b) const noexcept {
517
+ ASMJIT_ASSERT(a != nullptr); // There must be at least one block if this function is
518
+ ASMJIT_ASSERT(b != nullptr); // called, as both `a` and `b` must be valid blocks.
519
+ ASMJIT_ASSERT(a != b); // Checked by `dominates()` and `properlyDominates()`.
520
+
521
+ if (a == b)
522
+ return a;
523
+
524
+ // If `a` strictly dominates `b` then `a` is the nearest common dominator.
525
+ if (_strictlyDominates(a, b))
526
+ return a;
527
+
528
+ // If `b` strictly dominates `a` then `b` is the nearest common dominator.
529
+ if (_strictlyDominates(b, a))
530
+ return b;
531
+
532
+ const RABlock* entryBlock = this->entryBlock();
533
+ uint64_t timestamp = nextTimestamp();
534
+
535
+ // Mark all A's dominators.
536
+ const RABlock* block = a->iDom();
537
+ while (block != entryBlock) {
538
+ block->setTimestamp(timestamp);
539
+ block = block->iDom();
540
+ }
541
+
542
+ // Check all B's dominators against marked dominators of A.
543
+ block = b->iDom();
544
+ while (block != entryBlock) {
545
+ if (block->hasTimestamp(timestamp))
546
+ return block;
547
+ block = block->iDom();
548
+ }
549
+
550
+ return entryBlock;
551
+ }
552
+
553
+ // BaseRAPass - CFG - Utilities
554
+ // ============================
555
+
556
+ Error BaseRAPass::removeUnreachableCode() noexcept {
557
+ uint32_t numAllBlocks = blockCount();
558
+ uint32_t numReachableBlocks = reachableBlockCount();
559
+
560
+ // All reachable -> nothing to do.
561
+ if (numAllBlocks == numReachableBlocks)
562
+ return kErrorOk;
563
+
564
+ #ifndef ASMJIT_NO_LOGGING
565
+ StringTmp<256> sb;
566
+ Logger* logger = getLoggerIf(DiagnosticOptions::kRADebugUnreachable);
567
+ ASMJIT_RA_LOG_FORMAT("[RemoveUnreachableCode - detected %u of %u unreachable blocks]\n", numAllBlocks - numReachableBlocks, numAllBlocks);
568
+ #endif
569
+
570
+ for (uint32_t i = 0; i < numAllBlocks; i++) {
571
+ RABlock* block = _blocks[i];
572
+ if (block->isReachable())
573
+ continue;
574
+
575
+ ASMJIT_RA_LOG_FORMAT(" Removing code from unreachable block {%u}\n", i);
576
+ BaseNode* first = block->first();
577
+ BaseNode* last = block->last();
578
+
579
+ BaseNode* beforeFirst = first->prev();
580
+ BaseNode* afterLast = last->next();
581
+
582
+ BaseNode* node = first;
583
+ while (node != afterLast) {
584
+ BaseNode* next = node->next();
585
+
586
+ if (node->isCode() || node->isRemovable()) {
587
+ #ifndef ASMJIT_NO_LOGGING
588
+ if (logger) {
589
+ sb.clear();
590
+ Formatter::formatNode(sb, _formatOptions, cc(), node);
591
+ logger->logf(" %s\n", sb.data());
592
+ }
593
+ #endif
594
+ cc()->removeNode(node);
595
+ }
596
+ node = next;
597
+ }
598
+
599
+ if (beforeFirst->next() == afterLast) {
600
+ block->setFirst(nullptr);
601
+ block->setLast(nullptr);
602
+ }
603
+ else {
604
+ block->setFirst(beforeFirst->next());
605
+ block->setLast(afterLast->prev());
606
+ }
607
+ }
608
+
609
+ return kErrorOk;
610
+ }
611
+
612
+ BaseNode* BaseRAPass::findSuccessorStartingAt(BaseNode* node) noexcept {
613
+ while (node && (node->isInformative() || node->hasNoEffect()))
614
+ node = node->next();
615
+ return node;
616
+ }
617
+
618
+ bool BaseRAPass::isNextTo(BaseNode* node, BaseNode* target) noexcept {
619
+ for (;;) {
620
+ node = node->next();
621
+ if (node == target)
622
+ return true;
623
+
624
+ if (!node)
625
+ return false;
626
+
627
+ if (node->isCode() || node->isData())
628
+ return false;
629
+ }
630
+ }
631
+
632
+ // BaseRAPass - Registers - VirtReg / WorkReg Mapping
633
+ // ==================================================
634
+
635
+ Error BaseRAPass::_asWorkReg(VirtReg* vReg, RAWorkReg** out) noexcept {
636
+ // Checked by `asWorkReg()` - must be true.
637
+ ASMJIT_ASSERT(vReg->_workReg == nullptr);
638
+
639
+ RegGroup group = vReg->group();
640
+ ASMJIT_ASSERT(group <= RegGroup::kMaxVirt);
641
+
642
+ RAWorkRegs& wRegs = workRegs();
643
+ RAWorkRegs& wRegsByGroup = workRegs(group);
644
+
645
+ ASMJIT_PROPAGATE(wRegs.willGrow(allocator()));
646
+ ASMJIT_PROPAGATE(wRegsByGroup.willGrow(allocator()));
647
+
648
+ RAWorkReg* wReg = zone()->newT<RAWorkReg>(vReg, wRegs.size());
649
+ if (ASMJIT_UNLIKELY(!wReg))
650
+ return DebugUtils::errored(kErrorOutOfMemory);
651
+
652
+ vReg->setWorkReg(wReg);
653
+ if (!vReg->isStack())
654
+ wReg->setRegByteMask(Support::lsbMask<uint64_t>(vReg->virtSize()));
655
+ wRegs.appendUnsafe(wReg);
656
+ wRegsByGroup.appendUnsafe(wReg);
657
+
658
+ // Only used by RA logging.
659
+ _maxWorkRegNameSize = Support::max(_maxWorkRegNameSize, vReg->nameSize());
660
+
661
+ *out = wReg;
662
+ return kErrorOk;
663
+ }
664
+
665
+ RAAssignment::WorkToPhysMap* BaseRAPass::newWorkToPhysMap() noexcept {
666
+ uint32_t count = workRegCount();
667
+ size_t size = WorkToPhysMap::sizeOf(count);
668
+
669
+ // If no registers are used it could be zero, in that case return a dummy
670
+ // map instead of NULL.
671
+ if (ASMJIT_UNLIKELY(!size)) {
672
+ static const RAAssignment::WorkToPhysMap nullMap = {{ 0 }};
673
+ return const_cast<RAAssignment::WorkToPhysMap*>(&nullMap);
674
+ }
675
+
676
+ WorkToPhysMap* map = zone()->allocT<WorkToPhysMap>(size);
677
+ if (ASMJIT_UNLIKELY(!map))
678
+ return nullptr;
679
+
680
+ map->reset(count);
681
+ return map;
682
+ }
683
+
684
+ RAAssignment::PhysToWorkMap* BaseRAPass::newPhysToWorkMap() noexcept {
685
+ uint32_t count = physRegTotal();
686
+ size_t size = PhysToWorkMap::sizeOf(count);
687
+
688
+ PhysToWorkMap* map = zone()->allocT<PhysToWorkMap>(size);
689
+ if (ASMJIT_UNLIKELY(!map))
690
+ return nullptr;
691
+
692
+ map->reset(count);
693
+ return map;
694
+ }
695
+
696
+ // BaseRAPass - Registers - Liveness Analysis and Statistics
697
+ // =========================================================
698
+
699
+ namespace LiveOps {
700
+ typedef ZoneBitVector::BitWord BitWord;
701
+
702
+ struct In {
703
+ static ASMJIT_FORCE_INLINE BitWord op(BitWord dst, BitWord out, BitWord gen, BitWord kill) noexcept {
704
+ DebugUtils::unused(dst);
705
+ return (out | gen) & ~kill;
706
+ }
707
+ };
708
+
709
+ template<typename Operator>
710
+ static ASMJIT_FORCE_INLINE bool op(BitWord* dst, const BitWord* a, uint32_t n) noexcept {
711
+ BitWord changed = 0;
712
+
713
+ for (uint32_t i = 0; i < n; i++) {
714
+ BitWord before = dst[i];
715
+ BitWord after = Operator::op(before, a[i]);
716
+
717
+ dst[i] = after;
718
+ changed |= (before ^ after);
719
+ }
720
+
721
+ return changed != 0;
722
+ }
723
+
724
+ template<typename Operator>
725
+ static ASMJIT_FORCE_INLINE bool op(BitWord* dst, const BitWord* a, const BitWord* b, uint32_t n) noexcept {
726
+ BitWord changed = 0;
727
+
728
+ for (uint32_t i = 0; i < n; i++) {
729
+ BitWord before = dst[i];
730
+ BitWord after = Operator::op(before, a[i], b[i]);
731
+
732
+ dst[i] = after;
733
+ changed |= (before ^ after);
734
+ }
735
+
736
+ return changed != 0;
737
+ }
738
+
739
+ template<typename Operator>
740
+ static ASMJIT_FORCE_INLINE bool op(BitWord* dst, const BitWord* a, const BitWord* b, const BitWord* c, uint32_t n) noexcept {
741
+ BitWord changed = 0;
742
+
743
+ for (uint32_t i = 0; i < n; i++) {
744
+ BitWord before = dst[i];
745
+ BitWord after = Operator::op(before, a[i], b[i], c[i]);
746
+
747
+ dst[i] = after;
748
+ changed |= (before ^ after);
749
+ }
750
+
751
+ return changed != 0;
752
+ }
753
+
754
+ static ASMJIT_FORCE_INLINE bool recalcInOut(RABlock* block, uint32_t numBitWords, bool initial = false) noexcept {
755
+ bool changed = initial;
756
+
757
+ const RABlocks& successors = block->successors();
758
+ uint32_t numSuccessors = successors.size();
759
+
760
+ // Calculate `OUT` based on `IN` of all successors.
761
+ for (uint32_t i = 0; i < numSuccessors; i++)
762
+ changed |= op<Support::Or>(block->liveOut().data(), successors[i]->liveIn().data(), numBitWords);
763
+
764
+ // Calculate `IN` based on `OUT`, `GEN`, and `KILL` bits.
765
+ if (changed)
766
+ changed = op<In>(block->liveIn().data(), block->liveOut().data(), block->gen().data(), block->kill().data(), numBitWords);
767
+
768
+ return changed;
769
+ }
770
+ }
771
+
772
+ ASMJIT_FAVOR_SPEED Error BaseRAPass::buildLiveness() noexcept {
773
+ #ifndef ASMJIT_NO_LOGGING
774
+ Logger* logger = getLoggerIf(DiagnosticOptions::kRADebugLiveness);
775
+ StringTmp<512> sb;
776
+ #endif
777
+
778
+ ASMJIT_RA_LOG_FORMAT("[BuildLiveness]\n");
779
+
780
+ uint32_t i;
781
+
782
+ uint32_t numAllBlocks = blockCount();
783
+ uint32_t numReachableBlocks = reachableBlockCount();
784
+
785
+ uint32_t numVisits = numReachableBlocks;
786
+ uint32_t numWorkRegs = workRegCount();
787
+ uint32_t numBitWords = ZoneBitVector::_wordsPerBits(numWorkRegs);
788
+
789
+ if (!numWorkRegs) {
790
+ ASMJIT_RA_LOG_FORMAT(" Done (no virtual registers)\n");
791
+ return kErrorOk;
792
+ }
793
+
794
+ ZoneVector<uint32_t> nUsesPerWorkReg; // Number of USEs of each RAWorkReg.
795
+ ZoneVector<uint32_t> nOutsPerWorkReg; // Number of OUTs of each RAWorkReg.
796
+ ZoneVector<uint32_t> nInstsPerBlock; // Number of instructions of each RABlock.
797
+
798
+ ASMJIT_PROPAGATE(nUsesPerWorkReg.resize(allocator(), numWorkRegs));
799
+ ASMJIT_PROPAGATE(nOutsPerWorkReg.resize(allocator(), numWorkRegs));
800
+ ASMJIT_PROPAGATE(nInstsPerBlock.resize(allocator(), numAllBlocks));
801
+
802
+ // Calculate GEN/KILL of Each Block
803
+ // --------------------------------
804
+
805
+ for (i = 0; i < numReachableBlocks; i++) {
806
+ RABlock* block = _pov[i];
807
+ ASMJIT_PROPAGATE(block->resizeLiveBits(numWorkRegs));
808
+
809
+ BaseNode* node = block->last();
810
+ BaseNode* stop = block->first();
811
+
812
+ uint32_t nInsts = 0;
813
+ for (;;) {
814
+ if (node->isInst()) {
815
+ InstNode* inst = node->as<InstNode>();
816
+ RAInst* raInst = inst->passData<RAInst>();
817
+ ASMJIT_ASSERT(raInst != nullptr);
818
+
819
+ RATiedReg* tiedRegs = raInst->tiedRegs();
820
+ uint32_t count = raInst->tiedCount();
821
+
822
+ for (uint32_t j = 0; j < count; j++) {
823
+ RATiedReg* tiedReg = &tiedRegs[j];
824
+ uint32_t workId = tiedReg->workId();
825
+
826
+ // Update `nUses` and `nOuts`.
827
+ nUsesPerWorkReg[workId] += 1u;
828
+ nOutsPerWorkReg[workId] += uint32_t(tiedReg->isWrite());
829
+
830
+ // Mark as:
831
+ // KILL - if this VirtReg is killed afterwards.
832
+ // LAST - if this VirtReg is last in this basic block.
833
+ if (block->kill().bitAt(workId))
834
+ tiedReg->addFlags(RATiedFlags::kKill);
835
+ else if (!block->gen().bitAt(workId))
836
+ tiedReg->addFlags(RATiedFlags::kLast);
837
+
838
+ if (tiedReg->isWriteOnly()) {
839
+ // KILL.
840
+ block->kill().setBit(workId, true);
841
+ }
842
+ else {
843
+ // GEN.
844
+ block->kill().setBit(workId, false);
845
+ block->gen().setBit(workId, true);
846
+ }
847
+
848
+ if (tiedReg->isLeadConsecutive()) {
849
+ RAWorkReg* workReg = workRegById(workId);
850
+ workReg->markLeadConsecutive();
851
+ }
852
+
853
+ if (tiedReg->hasConsecutiveParent()) {
854
+ RAWorkReg* consecutiveParentReg = workRegById(tiedReg->consecutiveParent());
855
+ consecutiveParentReg->addImmediateConsecutive(allocator(), workId);
856
+ }
857
+ }
858
+
859
+ nInsts++;
860
+ }
861
+
862
+ if (node == stop)
863
+ break;
864
+
865
+ node = node->prev();
866
+ ASMJIT_ASSERT(node != nullptr);
867
+ }
868
+
869
+ nInstsPerBlock[block->blockId()] = nInsts;
870
+ }
871
+
872
+ // Calculate IN/OUT of Each Block
873
+ // ------------------------------
874
+
875
+ {
876
+ ZoneStack<RABlock*> workList;
877
+ ZoneBitVector workBits;
878
+
879
+ ASMJIT_PROPAGATE(workList.init(allocator()));
880
+ ASMJIT_PROPAGATE(workBits.resize(allocator(), blockCount(), true));
881
+
882
+ for (i = 0; i < numReachableBlocks; i++) {
883
+ RABlock* block = _pov[i];
884
+ LiveOps::recalcInOut(block, numBitWords, true);
885
+ ASMJIT_PROPAGATE(workList.append(block));
886
+ }
887
+
888
+ while (!workList.empty()) {
889
+ RABlock* block = workList.popFirst();
890
+ uint32_t blockId = block->blockId();
891
+
892
+ workBits.setBit(blockId, false);
893
+ if (LiveOps::recalcInOut(block, numBitWords)) {
894
+ const RABlocks& predecessors = block->predecessors();
895
+ uint32_t numPredecessors = predecessors.size();
896
+
897
+ for (uint32_t j = 0; j < numPredecessors; j++) {
898
+ RABlock* pred = predecessors[j];
899
+ if (!workBits.bitAt(pred->blockId())) {
900
+ workBits.setBit(pred->blockId(), true);
901
+ ASMJIT_PROPAGATE(workList.append(pred));
902
+ }
903
+ }
904
+ }
905
+ numVisits++;
906
+ }
907
+
908
+ workList.reset();
909
+ workBits.release(allocator());
910
+ }
911
+
912
+ ASMJIT_RA_LOG_COMPLEX({
913
+ logger->logf(" LiveIn/Out Done (%u visits)\n", numVisits);
914
+ for (i = 0; i < numAllBlocks; i++) {
915
+ RABlock* block = _blocks[i];
916
+
917
+ ASMJIT_PROPAGATE(sb.assignFormat(" {#%u}\n", block->blockId()));
918
+ ASMJIT_PROPAGATE(_dumpBlockLiveness(sb, block));
919
+
920
+ logger->log(sb);
921
+ }
922
+ });
923
+
924
+ // Reserve the space in each `RAWorkReg` for references
925
+ // ----------------------------------------------------
926
+
927
+ for (i = 0; i < numWorkRegs; i++) {
928
+ RAWorkReg* workReg = workRegById(i);
929
+ ASMJIT_PROPAGATE(workReg->_refs.reserve(allocator(), nUsesPerWorkReg[i]));
930
+ ASMJIT_PROPAGATE(workReg->_writes.reserve(allocator(), nOutsPerWorkReg[i]));
931
+ }
932
+
933
+ // Assign block and instruction positions, build LiveCount and LiveSpans
934
+ // ---------------------------------------------------------------------
935
+
936
+ uint32_t position = 2;
937
+ for (i = 0; i < numAllBlocks; i++) {
938
+ RABlock* block = _blocks[i];
939
+ if (!block->isReachable())
940
+ continue;
941
+
942
+ BaseNode* node = block->first();
943
+ BaseNode* stop = block->last();
944
+
945
+ uint32_t endPosition = position + nInstsPerBlock[i] * 2;
946
+ block->setFirstPosition(position);
947
+ block->setEndPosition(endPosition);
948
+
949
+ RALiveCount curLiveCount;
950
+ RALiveCount maxLiveCount;
951
+
952
+ // Process LIVE-IN.
953
+ ZoneBitVector::ForEachBitSet it(block->liveIn());
954
+ while (it.hasNext()) {
955
+ RAWorkReg* workReg = _workRegs[uint32_t(it.next())];
956
+ curLiveCount[workReg->group()]++;
957
+ ASMJIT_PROPAGATE(workReg->liveSpans().openAt(allocator(), position, endPosition));
958
+ }
959
+
960
+ for (;;) {
961
+ if (node->isInst()) {
962
+ InstNode* inst = node->as<InstNode>();
963
+ RAInst* raInst = inst->passData<RAInst>();
964
+ ASMJIT_ASSERT(raInst != nullptr);
965
+
966
+ RATiedReg* tiedRegs = raInst->tiedRegs();
967
+ uint32_t count = raInst->tiedCount();
968
+
969
+ inst->setPosition(position);
970
+ raInst->_liveCount = curLiveCount;
971
+
972
+ for (uint32_t j = 0; j < count; j++) {
973
+ RATiedReg* tiedReg = &tiedRegs[j];
974
+ uint32_t workId = tiedReg->workId();
975
+
976
+ // Create refs and writes.
977
+ RAWorkReg* workReg = workRegById(workId);
978
+ workReg->_refs.appendUnsafe(node);
979
+ if (tiedReg->isWrite())
980
+ workReg->_writes.appendUnsafe(node);
981
+
982
+ // We couldn't calculate this in previous steps, but since we know all LIVE-OUT at this point it becomes
983
+ // trivial. If this is the last instruction that uses this `workReg` and it's not LIVE-OUT then it is
984
+ // KILLed here.
985
+ if (tiedReg->isLast() && !block->liveOut().bitAt(workId))
986
+ tiedReg->addFlags(RATiedFlags::kKill);
987
+
988
+ LiveRegSpans& liveSpans = workReg->liveSpans();
989
+ bool wasOpen;
990
+ ASMJIT_PROPAGATE(liveSpans.openAt(allocator(), position + !tiedReg->isRead(), endPosition, wasOpen));
991
+
992
+ RegGroup group = workReg->group();
993
+ if (!wasOpen) {
994
+ curLiveCount[group]++;
995
+ raInst->_liveCount[group]++;
996
+ }
997
+
998
+ if (tiedReg->isKill()) {
999
+ liveSpans.closeAt(position + !tiedReg->isRead() + 1);
1000
+ curLiveCount[group]--;
1001
+ }
1002
+
1003
+ // Update `RAWorkReg::useIdMask` and `RAWorkReg::hintRegId`.
1004
+ if (tiedReg->hasUseId()) {
1005
+ uint32_t useId = tiedReg->useId();
1006
+ workReg->addUseIdMask(Support::bitMask(useId));
1007
+ if (!workReg->hasHintRegId() && !Support::bitTest(raInst->_clobberedRegs[group], useId))
1008
+ workReg->setHintRegId(useId);
1009
+ }
1010
+
1011
+ if (tiedReg->useRegMask()) {
1012
+ workReg->restrictPreferredMask(tiedReg->useRegMask());
1013
+ if (workReg->isLeadConsecutive())
1014
+ workReg->restrictConsecutiveMask(tiedReg->useRegMask());
1015
+ }
1016
+
1017
+ if (tiedReg->outRegMask()) {
1018
+ workReg->restrictPreferredMask(tiedReg->outRegMask());
1019
+ if (workReg->isLeadConsecutive())
1020
+ workReg->restrictConsecutiveMask(tiedReg->outRegMask());
1021
+ }
1022
+
1023
+ // Update `RAWorkReg::clobberedSurvivalMask`.
1024
+ if (raInst->_clobberedRegs[group] && !tiedReg->isOutOrKill()) {
1025
+ workReg->addClobberSurvivalMask(raInst->_clobberedRegs[group]);
1026
+ }
1027
+ }
1028
+
1029
+ position += 2;
1030
+ maxLiveCount.op<Support::Max>(raInst->_liveCount);
1031
+ }
1032
+
1033
+ if (node == stop)
1034
+ break;
1035
+
1036
+ node = node->next();
1037
+ ASMJIT_ASSERT(node != nullptr);
1038
+ }
1039
+
1040
+ block->_maxLiveCount = maxLiveCount;
1041
+ _globalMaxLiveCount.op<Support::Max>(maxLiveCount);
1042
+ ASMJIT_ASSERT(position == block->endPosition());
1043
+ }
1044
+
1045
+ // Calculate WorkReg statistics
1046
+ // ----------------------------
1047
+
1048
+ for (i = 0; i < numWorkRegs; i++) {
1049
+ RAWorkReg* workReg = _workRegs[i];
1050
+
1051
+ LiveRegSpans& spans = workReg->liveSpans();
1052
+ uint32_t width = spans.width();
1053
+ float freq = width ? float(double(workReg->_refs.size()) / double(width)) : float(0);
1054
+
1055
+ RALiveStats& stats = workReg->liveStats();
1056
+ stats._width = width;
1057
+ stats._freq = freq;
1058
+ stats._priority = freq + float(int(workReg->virtReg()->weight())) * 0.01f;
1059
+ }
1060
+
1061
+ ASMJIT_RA_LOG_COMPLEX({
1062
+ sb.clear();
1063
+ _dumpLiveSpans(sb);
1064
+ logger->log(sb);
1065
+ });
1066
+
1067
+ nUsesPerWorkReg.release(allocator());
1068
+ nOutsPerWorkReg.release(allocator());
1069
+ nInstsPerBlock.release(allocator());
1070
+
1071
+ return kErrorOk;
1072
+ }
1073
+
1074
+ Error BaseRAPass::assignArgIndexToWorkRegs() noexcept {
1075
+ ZoneBitVector& liveIn = entryBlock()->liveIn();
1076
+ uint32_t argCount = func()->argCount();
1077
+
1078
+ for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
1079
+ for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
1080
+ // Unassigned argument.
1081
+ const RegOnly& regArg = func()->argPack(argIndex)[valueIndex];
1082
+ if (!regArg.isReg() || !cc()->isVirtIdValid(regArg.id()))
1083
+ continue;
1084
+
1085
+ VirtReg* virtReg = cc()->virtRegById(regArg.id());
1086
+ if (!virtReg)
1087
+ continue;
1088
+
1089
+ // Unreferenced argument.
1090
+ RAWorkReg* workReg = virtReg->workReg();
1091
+ if (!workReg)
1092
+ continue;
1093
+
1094
+ // Overwritten argument.
1095
+ uint32_t workId = workReg->workId();
1096
+ if (!liveIn.bitAt(workId))
1097
+ continue;
1098
+
1099
+ workReg->setArgIndex(argIndex, valueIndex);
1100
+ const FuncValue& arg = func()->detail().arg(argIndex, valueIndex);
1101
+
1102
+ if (arg.isReg() && _archTraits->regTypeToGroup(arg.regType()) == workReg->group()) {
1103
+ workReg->setHintRegId(arg.regId());
1104
+ }
1105
+ }
1106
+ }
1107
+
1108
+ return kErrorOk;
1109
+ }
1110
+
1111
+ // BaseRAPass - Allocation - Global
1112
+ // ================================
1113
+
1114
+ #ifndef ASMJIT_NO_LOGGING
1115
+ static void RAPass_dumpSpans(String& sb, uint32_t index, const LiveRegSpans& liveSpans) noexcept {
1116
+ sb.appendFormat(" %02u: ", index);
1117
+
1118
+ for (uint32_t i = 0; i < liveSpans.size(); i++) {
1119
+ const LiveRegSpan& liveSpan = liveSpans[i];
1120
+ if (i) sb.append(", ");
1121
+ sb.appendFormat("[%u:%u@%u]", liveSpan.a, liveSpan.b, liveSpan.id);
1122
+ }
1123
+
1124
+ sb.append('\n');
1125
+ }
1126
+ #endif
1127
+
1128
+ Error BaseRAPass::runGlobalAllocator() noexcept {
1129
+ ASMJIT_PROPAGATE(initGlobalLiveSpans());
1130
+
1131
+ for (RegGroup group : RegGroupVirtValues{}) {
1132
+ ASMJIT_PROPAGATE(binPack(group));
1133
+ }
1134
+
1135
+ return kErrorOk;
1136
+ }
1137
+
1138
+ ASMJIT_FAVOR_SPEED Error BaseRAPass::initGlobalLiveSpans() noexcept {
1139
+ for (RegGroup group : RegGroupVirtValues{}) {
1140
+ size_t physCount = _physRegCount[group];
1141
+ LiveRegSpans* liveSpans = nullptr;
1142
+
1143
+ if (physCount) {
1144
+ liveSpans = allocator()->allocT<LiveRegSpans>(physCount * sizeof(LiveRegSpans));
1145
+ if (ASMJIT_UNLIKELY(!liveSpans))
1146
+ return DebugUtils::errored(kErrorOutOfMemory);
1147
+
1148
+ for (size_t physId = 0; physId < physCount; physId++)
1149
+ new(&liveSpans[physId]) LiveRegSpans();
1150
+ }
1151
+
1152
+ _globalLiveSpans[group] = liveSpans;
1153
+ }
1154
+
1155
+ return kErrorOk;
1156
+ }
1157
+
1158
+ struct RAConsecutiveReg {
1159
+ RAWorkReg* workReg;
1160
+ RAWorkReg* parentReg;
1161
+ };
1162
+
1163
+ ASMJIT_FAVOR_SPEED Error BaseRAPass::binPack(RegGroup group) noexcept {
1164
+ if (workRegCount(group) == 0)
1165
+ return kErrorOk;
1166
+
1167
+ #ifndef ASMJIT_NO_LOGGING
1168
+ Logger* logger = getLoggerIf(DiagnosticOptions::kRADebugAssignment);
1169
+ StringTmp<512> sb;
1170
+
1171
+ ASMJIT_RA_LOG_FORMAT("[BinPack] Available=%u (0x%08X) Count=%u RegGroup=%u\n",
1172
+ Support::popcnt(_availableRegs[group]),
1173
+ _availableRegs[group],
1174
+ workRegCount(group),
1175
+ uint32_t(group));
1176
+ #endif
1177
+
1178
+ uint32_t i;
1179
+ uint32_t physCount = _physRegCount[group];
1180
+
1181
+ RAWorkRegs workRegs;
1182
+ ZoneVector<RAConsecutiveReg> consecutiveRegs;
1183
+ LiveRegSpans tmpSpans;
1184
+
1185
+ ASMJIT_PROPAGATE(workRegs.concat(allocator(), this->workRegs(group)));
1186
+ workRegs.sort([](const RAWorkReg* a, const RAWorkReg* b) noexcept {
1187
+ return b->liveStats().priority() - a->liveStats().priority();
1188
+ });
1189
+
1190
+ uint32_t numWorkRegs = workRegs.size();
1191
+ RegMask availableRegs = _availableRegs[group];
1192
+
1193
+ // First try to pack everything that provides register-id hint as these are most likely function arguments and fixed
1194
+ // (precolored) virtual registers.
1195
+ if (!workRegs.empty()) {
1196
+ uint32_t dstIndex = 0;
1197
+
1198
+ for (i = 0; i < numWorkRegs; i++) {
1199
+ RAWorkReg* workReg = workRegs[i];
1200
+
1201
+ if (workReg->isLeadConsecutive()) {
1202
+ ASMJIT_PROPAGATE(consecutiveRegs.append(allocator(), RAConsecutiveReg{workReg, nullptr}));
1203
+ workReg->markProcessedConsecutive();
1204
+ }
1205
+
1206
+ if (workReg->hasHintRegId()) {
1207
+ uint32_t physId = workReg->hintRegId();
1208
+ if (Support::bitTest(availableRegs, physId)) {
1209
+ LiveRegSpans& live = _globalLiveSpans[group][physId];
1210
+ Error err = tmpSpans.nonOverlappingUnionOf(allocator(), live, workReg->liveSpans(), LiveRegData(workReg->virtId()));
1211
+
1212
+ if (err == kErrorOk) {
1213
+ live.swap(tmpSpans);
1214
+ workReg->setHomeRegId(physId);
1215
+ workReg->markAllocated();
1216
+ continue;
1217
+ }
1218
+
1219
+ if (err != 0xFFFFFFFFu)
1220
+ return err;
1221
+ }
1222
+ }
1223
+
1224
+ workRegs[dstIndex++] = workReg;
1225
+ }
1226
+
1227
+ workRegs._setSize(dstIndex);
1228
+ numWorkRegs = dstIndex;
1229
+ }
1230
+
1231
+ // Allocate consecutive registers - both leads and all consecutives. This is important and prioritized over the rest,
1232
+ // because once a lead is allocated we really need to allocate its consecutives, otherwise we may bin pack other
1233
+ // registers into their places, which would result in wrong hints to the local allocator, and then into many moves
1234
+ // or spills.
1235
+ if (!consecutiveRegs.empty()) {
1236
+ // This loop appends all other consecutive registers into `consecutiveRegs` array. Leads are at the beginning,
1237
+ // non-leads follow.
1238
+ i = 0;
1239
+ for (;;) {
1240
+ uint32_t stop = consecutiveRegs.size();
1241
+ if (i == stop)
1242
+ break;
1243
+
1244
+ while (i < stop) {
1245
+ RAWorkReg* workReg = consecutiveRegs[i].workReg;
1246
+ if (workReg->hasImmediateConsecutives()) {
1247
+ ZoneBitVector::ForEachBitSet it(workReg->immediateConsecutives());
1248
+ while (it.hasNext()) {
1249
+ uint32_t consecutiveWorkId = uint32_t(it.next());
1250
+ RAWorkReg* consecutiveReg = workRegById(consecutiveWorkId);
1251
+ if (!consecutiveReg->isProcessedConsecutive()) {
1252
+ ASMJIT_PROPAGATE(consecutiveRegs.append(allocator(), RAConsecutiveReg{consecutiveReg, workReg}));
1253
+ consecutiveReg->markProcessedConsecutive();
1254
+ }
1255
+ }
1256
+ }
1257
+ i++;
1258
+ }
1259
+ }
1260
+
1261
+ uint32_t numConsecutiveRegs = consecutiveRegs.size();
1262
+ for (i = 0; i < numConsecutiveRegs; i++) {
1263
+ RAWorkReg* workReg = consecutiveRegs[i].workReg;
1264
+ if (workReg->isAllocated())
1265
+ continue;
1266
+
1267
+ RAWorkReg* parentReg = consecutiveRegs[i].parentReg;
1268
+ RegMask physRegs = 0;
1269
+
1270
+ if (!parentReg) {
1271
+ physRegs = availableRegs & workReg->preferredMask();
1272
+ if (!physRegs) {
1273
+ physRegs = availableRegs & workReg->consecutiveMask();
1274
+
1275
+ // NOTE: This should never be true as it would mean we would never allocate this virtual register
1276
+ // (not here, and not later when local register allocator processes RATiedReg sets).
1277
+ if (ASMJIT_UNLIKELY(!physRegs))
1278
+ return DebugUtils::errored(kErrorConsecutiveRegsAllocation);
1279
+ }
1280
+ }
1281
+ else if (parentReg->hasHomeRegId()) {
1282
+ uint32_t consecutiveId = parentReg->homeRegId() + 1;
1283
+
1284
+ // NOTE: We don't support wrapping. If this goes beyond all allocable registers there is something wrong.
1285
+ if (consecutiveId > 31 || !Support::bitTest(availableRegs, consecutiveId))
1286
+ return DebugUtils::errored(kErrorConsecutiveRegsAllocation);
1287
+
1288
+ workReg->setHintRegId(consecutiveId);
1289
+ physRegs = Support::bitMask(consecutiveId);
1290
+ }
1291
+
1292
+ while (physRegs) {
1293
+ uint32_t physId = Support::bitSizeOf<RegMask>() - 1 - Support::clz(physRegs);
1294
+
1295
+ LiveRegSpans& live = _globalLiveSpans[group][physId];
1296
+ Error err = tmpSpans.nonOverlappingUnionOf(allocator(), live, workReg->liveSpans(), LiveRegData(workReg->virtId()));
1297
+
1298
+ if (err == kErrorOk) {
1299
+ workReg->setHomeRegId(physId);
1300
+ workReg->markAllocated();
1301
+ live.swap(tmpSpans);
1302
+ break;
1303
+ }
1304
+
1305
+ if (ASMJIT_UNLIKELY(err != 0xFFFFFFFFu))
1306
+ return err;
1307
+
1308
+ physRegs ^= Support::bitMask(physId);
1309
+ }
1310
+ }
1311
+ }
1312
+
1313
+ // Try to pack the rest.
1314
+ if (!workRegs.empty()) {
1315
+ uint32_t dstIndex = 0;
1316
+
1317
+ for (i = 0; i < numWorkRegs; i++) {
1318
+ RAWorkReg* workReg = workRegs[i];
1319
+
1320
+ if (workReg->isAllocated())
1321
+ continue;
1322
+
1323
+ RegMask physRegs = availableRegs;
1324
+ if (physRegs & workReg->preferredMask())
1325
+ physRegs &= workReg->preferredMask();
1326
+
1327
+ while (physRegs) {
1328
+ RegMask preferredMask = physRegs;
1329
+ uint32_t physId = Support::ctz(preferredMask);
1330
+
1331
+ if (workReg->clobberSurvivalMask()) {
1332
+ preferredMask &= workReg->clobberSurvivalMask();
1333
+ if (preferredMask)
1334
+ physId = Support::ctz(preferredMask);
1335
+ }
1336
+
1337
+ LiveRegSpans& live = _globalLiveSpans[group][physId];
1338
+ Error err = tmpSpans.nonOverlappingUnionOf(allocator(), live, workReg->liveSpans(), LiveRegData(workReg->virtId()));
1339
+
1340
+ if (err == kErrorOk) {
1341
+ workReg->setHomeRegId(physId);
1342
+ workReg->markAllocated();
1343
+ live.swap(tmpSpans);
1344
+ break;
1345
+ }
1346
+
1347
+ if (ASMJIT_UNLIKELY(err != 0xFFFFFFFFu))
1348
+ return err;
1349
+
1350
+ physRegs ^= Support::bitMask(physId);
1351
+ }
1352
+
1353
+ // Keep it in `workRegs` if it was not allocated.
1354
+ if (!physRegs)
1355
+ workRegs[dstIndex++] = workReg;
1356
+ }
1357
+
1358
+ workRegs._setSize(dstIndex);
1359
+ numWorkRegs = dstIndex;
1360
+ }
1361
+
1362
+ ASMJIT_RA_LOG_COMPLEX({
1363
+ for (uint32_t physId = 0; physId < physCount; physId++) {
1364
+ LiveRegSpans& live = _globalLiveSpans[group][physId];
1365
+ if (live.empty())
1366
+ continue;
1367
+
1368
+ sb.clear();
1369
+ RAPass_dumpSpans(sb, physId, live);
1370
+ logger->log(sb);
1371
+ }
1372
+ });
1373
+
1374
+ // Maybe unused if logging is disabled.
1375
+ DebugUtils::unused(physCount);
1376
+
1377
+ if (workRegs.empty()) {
1378
+ ASMJIT_RA_LOG_FORMAT(" Completed.\n");
1379
+ }
1380
+ else {
1381
+ _strategy[group].setType(RAStrategyType::kComplex);
1382
+ for (RAWorkReg* workReg : workRegs)
1383
+ workReg->markStackPreferred();
1384
+
1385
+ ASMJIT_RA_LOG_COMPLEX({
1386
+ uint32_t count = workRegs.size();
1387
+ sb.clear();
1388
+ sb.appendFormat(" Unassigned (%u): ", count);
1389
+ for (i = 0; i < numWorkRegs; i++) {
1390
+ RAWorkReg* workReg = workRegs[i];
1391
+ if (i) sb.append(", ");
1392
+ sb.append(workReg->name());
1393
+ }
1394
+ sb.append('\n');
1395
+ logger->log(sb);
1396
+ });
1397
+ }
1398
+
1399
+ return kErrorOk;
1400
+ }
1401
+
1402
+ // BaseRAPass - Allocation - Local
1403
+ // ===============================
1404
+
1405
+ Error BaseRAPass::runLocalAllocator() noexcept {
1406
+ RALocalAllocator lra(this);
1407
+ ASMJIT_PROPAGATE(lra.init());
1408
+
1409
+ if (!blockCount())
1410
+ return kErrorOk;
1411
+
1412
+ // The allocation is done when this reaches zero.
1413
+ uint32_t blocksRemaining = reachableBlockCount();
1414
+
1415
+ // Current block.
1416
+ uint32_t blockId = 0;
1417
+ RABlock* block = _blocks[blockId];
1418
+
1419
+ // The first block (entry) must always be reachable.
1420
+ ASMJIT_ASSERT(block->isReachable());
1421
+
1422
+ // Assign function arguments for the initial block. The `lra` is valid now.
1423
+ lra.makeInitialAssignment();
1424
+ ASMJIT_PROPAGATE(setBlockEntryAssignment(block, block, lra._curAssignment));
1425
+
1426
+ // The loop starts from the first block and iterates blocks in order, however, the algorithm also allows to jump to
1427
+ // any other block when finished if it's a jump target. In-order iteration just makes sure that all blocks are visited.
1428
+ for (;;) {
1429
+ BaseNode* first = block->first();
1430
+ BaseNode* last = block->last();
1431
+ BaseNode* terminator = block->hasTerminator() ? last : nullptr;
1432
+
1433
+ BaseNode* beforeFirst = first->prev();
1434
+ BaseNode* afterLast = last->next();
1435
+
1436
+ bool unconditionalJump = false;
1437
+ RABlock* consecutive = nullptr;
1438
+
1439
+ if (block->hasSuccessors())
1440
+ consecutive = block->successors()[0];
1441
+
1442
+ lra.setBlock(block);
1443
+ block->makeAllocated();
1444
+
1445
+ BaseNode* node = first;
1446
+ while (node != afterLast) {
1447
+ BaseNode* next = node->next();
1448
+ if (node->isInst()) {
1449
+ InstNode* inst = node->as<InstNode>();
1450
+
1451
+ if (ASMJIT_UNLIKELY(inst == terminator)) {
1452
+ const RABlocks& successors = block->successors();
1453
+ if (block->hasConsecutive()) {
1454
+ ASMJIT_PROPAGATE(lra.allocBranch(inst, successors.last(), successors.first()));
1455
+
1456
+ node = next;
1457
+ continue;
1458
+ }
1459
+ else if (successors.size() > 1) {
1460
+ RABlock* cont = block->hasConsecutive() ? successors.first() : nullptr;
1461
+ ASMJIT_PROPAGATE(lra.allocJumpTable(inst, successors, cont));
1462
+
1463
+ node = next;
1464
+ continue;
1465
+ }
1466
+ else {
1467
+ // Otherwise this is an unconditional jump, special handling isn't required.
1468
+ unconditionalJump = true;
1469
+ }
1470
+ }
1471
+
1472
+ ASMJIT_PROPAGATE(lra.allocInst(inst));
1473
+ if (inst->type() == NodeType::kInvoke)
1474
+ ASMJIT_PROPAGATE(emitPreCall(inst->as<InvokeNode>()));
1475
+ else
1476
+ ASMJIT_PROPAGATE(lra.spillAfterAllocation(inst));
1477
+ }
1478
+ node = next;
1479
+ }
1480
+
1481
+ if (consecutive) {
1482
+ BaseNode* prev = afterLast ? afterLast->prev() : cc()->lastNode();
1483
+ cc()->_setCursor(unconditionalJump ? prev->prev() : prev);
1484
+
1485
+ if (consecutive->hasEntryAssignment()) {
1486
+ ASMJIT_PROPAGATE(lra.switchToAssignment(consecutive->entryPhysToWorkMap(), consecutive->liveIn(), consecutive->isAllocated(), false));
1487
+ }
1488
+ else {
1489
+ ASMJIT_PROPAGATE(lra.spillRegsBeforeEntry(consecutive));
1490
+ ASMJIT_PROPAGATE(setBlockEntryAssignment(consecutive, block, lra._curAssignment));
1491
+ lra._curAssignment.copyFrom(consecutive->entryPhysToWorkMap());
1492
+ }
1493
+ }
1494
+
1495
+ // Important as the local allocator can insert instructions before
1496
+ // and after any instruction within the basic block.
1497
+ block->setFirst(beforeFirst->next());
1498
+ block->setLast(afterLast ? afterLast->prev() : cc()->lastNode());
1499
+
1500
+ if (--blocksRemaining == 0)
1501
+ break;
1502
+
1503
+ // Switch to the next consecutive block, if any.
1504
+ if (consecutive) {
1505
+ block = consecutive;
1506
+ if (!block->isAllocated())
1507
+ continue;
1508
+ }
1509
+
1510
+ // Get the next block.
1511
+ for (;;) {
1512
+ if (++blockId >= blockCount())
1513
+ blockId = 0;
1514
+
1515
+ block = _blocks[blockId];
1516
+ if (!block->isReachable() || block->isAllocated() || !block->hasEntryAssignment())
1517
+ continue;
1518
+
1519
+ break;
1520
+ }
1521
+
1522
+ // If we switched to some block we have to update the local allocator.
1523
+ lra.replaceAssignment(block->entryPhysToWorkMap());
1524
+ }
1525
+
1526
+ _clobberedRegs.op<Support::Or>(lra._clobberedRegs);
1527
+ return kErrorOk;
1528
+ }
1529
+
1530
+ Error BaseRAPass::setBlockEntryAssignment(RABlock* block, const RABlock* fromBlock, const RAAssignment& fromAssignment) noexcept {
1531
+ if (block->hasSharedAssignmentId()) {
1532
+ uint32_t sharedAssignmentId = block->sharedAssignmentId();
1533
+
1534
+ // Shouldn't happen. Entry assignment of a block that has a shared-state will assign to all blocks
1535
+ // with the same sharedAssignmentId. It's a bug if the shared state has been already assigned.
1536
+ if (!_sharedAssignments[sharedAssignmentId].empty())
1537
+ return DebugUtils::errored(kErrorInvalidState);
1538
+
1539
+ return setSharedAssignment(sharedAssignmentId, fromAssignment);
1540
+ }
1541
+
1542
+ PhysToWorkMap* physToWorkMap = clonePhysToWorkMap(fromAssignment.physToWorkMap());
1543
+ if (ASMJIT_UNLIKELY(!physToWorkMap))
1544
+ return DebugUtils::errored(kErrorOutOfMemory);
1545
+
1546
+ block->setEntryAssignment(physToWorkMap);
1547
+
1548
+ // True if this is the first (entry) block, nothing to do in this case.
1549
+ if (block == fromBlock) {
1550
+ // Entry block should never have a shared state.
1551
+ if (block->hasSharedAssignmentId())
1552
+ return DebugUtils::errored(kErrorInvalidState);
1553
+
1554
+ return kErrorOk;
1555
+ }
1556
+
1557
+ const ZoneBitVector& liveOut = fromBlock->liveOut();
1558
+ const ZoneBitVector& liveIn = block->liveIn();
1559
+
1560
+ // It's possible that `fromBlock` has LIVE-OUT regs that `block` doesn't
1561
+ // have in LIVE-IN, these have to be unassigned.
1562
+ {
1563
+ ZoneBitVector::ForEachBitOp<Support::AndNot> it(liveOut, liveIn);
1564
+ while (it.hasNext()) {
1565
+ uint32_t workId = uint32_t(it.next());
1566
+ RAWorkReg* workReg = workRegById(workId);
1567
+
1568
+ RegGroup group = workReg->group();
1569
+ uint32_t physId = fromAssignment.workToPhysId(group, workId);
1570
+
1571
+ if (physId != RAAssignment::kPhysNone)
1572
+ physToWorkMap->unassign(group, physId, _physRegIndex.get(group) + physId);
1573
+ }
1574
+ }
1575
+
1576
+ return blockEntryAssigned(physToWorkMap);
1577
+ }
1578
+
1579
+ Error BaseRAPass::setSharedAssignment(uint32_t sharedAssignmentId, const RAAssignment& fromAssignment) noexcept {
1580
+ ASMJIT_ASSERT(_sharedAssignments[sharedAssignmentId].empty());
1581
+
1582
+ PhysToWorkMap* physToWorkMap = clonePhysToWorkMap(fromAssignment.physToWorkMap());
1583
+ if (ASMJIT_UNLIKELY(!physToWorkMap))
1584
+ return DebugUtils::errored(kErrorOutOfMemory);
1585
+
1586
+ _sharedAssignments[sharedAssignmentId].assignPhysToWorkMap(physToWorkMap);
1587
+
1588
+ ZoneBitVector& sharedLiveIn = _sharedAssignments[sharedAssignmentId]._liveIn;
1589
+ ASMJIT_PROPAGATE(sharedLiveIn.resize(allocator(), workRegCount()));
1590
+
1591
+ Support::Array<uint32_t, Globals::kNumVirtGroups> sharedAssigned {};
1592
+ for (RABlock* block : blocks()) {
1593
+ if (block->sharedAssignmentId() == sharedAssignmentId) {
1594
+ ASMJIT_ASSERT(!block->hasEntryAssignment());
1595
+
1596
+ PhysToWorkMap* entryPhysToWorkMap = clonePhysToWorkMap(fromAssignment.physToWorkMap());
1597
+ if (ASMJIT_UNLIKELY(!entryPhysToWorkMap))
1598
+ return DebugUtils::errored(kErrorOutOfMemory);
1599
+
1600
+ block->setEntryAssignment(entryPhysToWorkMap);
1601
+
1602
+ const ZoneBitVector& liveIn = block->liveIn();
1603
+ sharedLiveIn.or_(liveIn);
1604
+
1605
+ for (RegGroup group : RegGroupVirtValues{}) {
1606
+ sharedAssigned[group] |= entryPhysToWorkMap->assigned[group];
1607
+
1608
+ uint32_t physBaseIndex = _physRegIndex.get(group);
1609
+ Support::BitWordIterator<RegMask> it(entryPhysToWorkMap->assigned[group]);
1610
+
1611
+ while (it.hasNext()) {
1612
+ uint32_t physId = it.next();
1613
+ uint32_t workId = entryPhysToWorkMap->workIds[physBaseIndex + physId];
1614
+
1615
+ if (!liveIn.bitAt(workId))
1616
+ entryPhysToWorkMap->unassign(group, physId, physBaseIndex + physId);
1617
+ }
1618
+ }
1619
+ }
1620
+ }
1621
+
1622
+ for (RegGroup group : RegGroupVirtValues{}) {
1623
+ uint32_t physBaseIndex = _physRegIndex.get(group);
1624
+ Support::BitWordIterator<RegMask> it(_availableRegs[group] & ~sharedAssigned[group]);
1625
+
1626
+ while (it.hasNext()) {
1627
+ uint32_t physId = it.next();
1628
+ if (Support::bitTest(physToWorkMap->assigned[group], physId))
1629
+ physToWorkMap->unassign(group, physId, physBaseIndex + physId);
1630
+ }
1631
+ }
1632
+
1633
+ return blockEntryAssigned(physToWorkMap);
1634
+ }
1635
+
1636
+ Error BaseRAPass::blockEntryAssigned(const PhysToWorkMap* physToWorkMap) noexcept {
1637
+ // Complex allocation strategy requires to record register assignments upon block entry (or per shared state).
1638
+ for (RegGroup group : RegGroupVirtValues{}) {
1639
+ if (!_strategy[group].isComplex())
1640
+ continue;
1641
+
1642
+ uint32_t physBaseIndex = _physRegIndex[group];
1643
+ Support::BitWordIterator<RegMask> it(physToWorkMap->assigned[group]);
1644
+
1645
+ while (it.hasNext()) {
1646
+ uint32_t physId = it.next();
1647
+ uint32_t workId = physToWorkMap->workIds[physBaseIndex + physId];
1648
+
1649
+ RAWorkReg* workReg = workRegById(workId);
1650
+ workReg->addAllocatedMask(Support::bitMask(physId));
1651
+ }
1652
+ }
1653
+
1654
+ return kErrorOk;
1655
+ }
1656
+
1657
+ // BaseRAPass - Allocation - Utilities
1658
+ // ===================================
1659
+
1660
+ Error BaseRAPass::useTemporaryMem(BaseMem& out, uint32_t size, uint32_t alignment) noexcept {
1661
+ ASMJIT_ASSERT(alignment <= 64);
1662
+
1663
+ if (_temporaryMem.isNone()) {
1664
+ ASMJIT_PROPAGATE(cc()->_newStack(&_temporaryMem.as<BaseMem>(), size, alignment));
1665
+ }
1666
+ else {
1667
+ ASMJIT_ASSERT(_temporaryMem.as<BaseMem>().isRegHome());
1668
+
1669
+ uint32_t virtId = _temporaryMem.as<BaseMem>().baseId();
1670
+ VirtReg* virtReg = cc()->virtRegById(virtId);
1671
+
1672
+ cc()->setStackSize(virtId, Support::max(virtReg->virtSize(), size),
1673
+ Support::max(virtReg->alignment(), alignment));
1674
+ }
1675
+
1676
+ out = _temporaryMem.as<BaseMem>();
1677
+ return kErrorOk;
1678
+ }
1679
+
1680
+ // BaseRAPass - Allocation - Prolog & Epilog
1681
+ // =========================================
1682
+
1683
+ Error BaseRAPass::updateStackFrame() noexcept {
1684
+ // Update some StackFrame information that we updated during allocation. The only information we don't have at the
1685
+ // moment is final local stack size, which is calculated last.
1686
+ FuncFrame& frame = func()->frame();
1687
+ for (RegGroup group : RegGroupVirtValues{})
1688
+ frame.addDirtyRegs(group, _clobberedRegs[group]);
1689
+ frame.setLocalStackAlignment(_stackAllocator.alignment());
1690
+
1691
+ // If there are stack arguments that are not assigned to registers upon entry and the function doesn't require
1692
+ // dynamic stack alignment we keep these arguments where they are. This will also mark all stack slots that match
1693
+ // these arguments as allocated.
1694
+ if (_numStackArgsToStackSlots)
1695
+ ASMJIT_PROPAGATE(_markStackArgsToKeep());
1696
+
1697
+ // Calculate offsets of all stack slots and update StackSize to reflect the calculated local stack size.
1698
+ ASMJIT_PROPAGATE(_stackAllocator.calculateStackFrame());
1699
+ frame.setLocalStackSize(_stackAllocator.stackSize());
1700
+
1701
+ // Update the stack frame based on `_argsAssignment` and finalize it. Finalization means to apply final calculation
1702
+ // to the stack layout.
1703
+ ASMJIT_PROPAGATE(_argsAssignment.updateFuncFrame(frame));
1704
+ ASMJIT_PROPAGATE(frame.finalize());
1705
+
1706
+ // StackAllocator allocates all stots starting from [0], adjust them when necessary.
1707
+ if (frame.localStackOffset() != 0)
1708
+ ASMJIT_PROPAGATE(_stackAllocator.adjustSlotOffsets(int32_t(frame.localStackOffset())));
1709
+
1710
+ // Again, if there are stack arguments allocated in function's stack we have to handle them. This handles all cases
1711
+ // (either regular or dynamic stack alignment).
1712
+ if (_numStackArgsToStackSlots)
1713
+ ASMJIT_PROPAGATE(_updateStackArgs());
1714
+
1715
+ return kErrorOk;
1716
+ }
1717
+
1718
+ Error BaseRAPass::_markStackArgsToKeep() noexcept {
1719
+ FuncFrame& frame = func()->frame();
1720
+ bool hasSAReg = frame.hasPreservedFP() || !frame.hasDynamicAlignment();
1721
+
1722
+ RAWorkRegs& workRegs = _workRegs;
1723
+ uint32_t numWorkRegs = workRegCount();
1724
+
1725
+ for (uint32_t workId = 0; workId < numWorkRegs; workId++) {
1726
+ RAWorkReg* workReg = workRegs[workId];
1727
+ if (workReg->hasFlag(RAWorkRegFlags::kStackArgToStack)) {
1728
+ ASMJIT_ASSERT(workReg->hasArgIndex());
1729
+ const FuncValue& srcArg = _func->detail().arg(workReg->argIndex());
1730
+
1731
+ // If the register doesn't have stack slot then we failed. It doesn't make much sense as it was marked as
1732
+ // `kFlagStackArgToStack`, which requires the WorkReg was live-in upon function entry.
1733
+ RAStackSlot* slot = workReg->stackSlot();
1734
+ if (ASMJIT_UNLIKELY(!slot))
1735
+ return DebugUtils::errored(kErrorInvalidState);
1736
+
1737
+ if (hasSAReg && srcArg.isStack() && !srcArg.isIndirect()) {
1738
+ uint32_t typeSize = TypeUtils::sizeOf(srcArg.typeId());
1739
+ if (typeSize == slot->size()) {
1740
+ slot->addFlags(RAStackSlot::kFlagStackArg);
1741
+ continue;
1742
+ }
1743
+ }
1744
+
1745
+ // NOTE: Update StackOffset here so when `_argsAssignment.updateFuncFrame()` is called it will take into
1746
+ // consideration moving to stack slots. Without this we may miss some scratch registers later.
1747
+ FuncValue& dstArg = _argsAssignment.arg(workReg->argIndex(), workReg->argValueIndex());
1748
+ dstArg.assignStackOffset(0);
1749
+ }
1750
+ }
1751
+
1752
+ return kErrorOk;
1753
+ }
1754
+
1755
+ Error BaseRAPass::_updateStackArgs() noexcept {
1756
+ FuncFrame& frame = func()->frame();
1757
+ RAWorkRegs& workRegs = _workRegs;
1758
+ uint32_t numWorkRegs = workRegCount();
1759
+
1760
+ for (uint32_t workId = 0; workId < numWorkRegs; workId++) {
1761
+ RAWorkReg* workReg = workRegs[workId];
1762
+ if (workReg->hasFlag(RAWorkRegFlags::kStackArgToStack)) {
1763
+ ASMJIT_ASSERT(workReg->hasArgIndex());
1764
+ RAStackSlot* slot = workReg->stackSlot();
1765
+
1766
+ if (ASMJIT_UNLIKELY(!slot))
1767
+ return DebugUtils::errored(kErrorInvalidState);
1768
+
1769
+ if (slot->isStackArg()) {
1770
+ const FuncValue& srcArg = _func->detail().arg(workReg->argIndex());
1771
+ if (frame.hasPreservedFP()) {
1772
+ slot->setBaseRegId(_fp.id());
1773
+ slot->setOffset(int32_t(frame.saOffsetFromSA()) + srcArg.stackOffset());
1774
+ }
1775
+ else {
1776
+ slot->setOffset(int32_t(frame.saOffsetFromSP()) + srcArg.stackOffset());
1777
+ }
1778
+ }
1779
+ else {
1780
+ FuncValue& dstArg = _argsAssignment.arg(workReg->argIndex(), workReg->argValueIndex());
1781
+ dstArg.setStackOffset(slot->offset());
1782
+ }
1783
+ }
1784
+ }
1785
+
1786
+ return kErrorOk;
1787
+ }
1788
+
1789
+ Error BaseRAPass::insertPrologEpilog() noexcept {
1790
+ FuncFrame& frame = _func->frame();
1791
+
1792
+ cc()->_setCursor(func());
1793
+ ASMJIT_PROPAGATE(cc()->emitProlog(frame));
1794
+ ASMJIT_PROPAGATE(_iEmitHelper->emitArgsAssignment(frame, _argsAssignment));
1795
+
1796
+ cc()->_setCursor(func()->exitNode());
1797
+ ASMJIT_PROPAGATE(cc()->emitEpilog(frame));
1798
+
1799
+ return kErrorOk;
1800
+ }
1801
+
1802
+ // BaseRAPass - Rewriter
1803
+ // =====================
1804
+
1805
+ Error BaseRAPass::rewrite() noexcept {
1806
+ return _rewrite(_func, _stop);
1807
+ }
1808
+
1809
+ // BaseRAPass - Logging
1810
+ // ====================
1811
+
1812
+ #ifndef ASMJIT_NO_LOGGING
1813
+ static void RAPass_formatLiveness(BaseRAPass* pass, String& sb, const RAInst* raInst) noexcept {
1814
+ const RATiedReg* tiedRegs = raInst->tiedRegs();
1815
+ uint32_t tiedCount = raInst->tiedCount();
1816
+
1817
+ for (uint32_t i = 0; i < tiedCount; i++) {
1818
+ const RATiedReg& tiedReg = tiedRegs[i];
1819
+
1820
+ if (i != 0)
1821
+ sb.append(' ');
1822
+
1823
+ sb.appendFormat("%s{", pass->workRegById(tiedReg.workId())->name());
1824
+ sb.append(tiedReg.isReadWrite() ? 'X' :
1825
+ tiedReg.isRead() ? 'R' :
1826
+ tiedReg.isWrite() ? 'W' : '?');
1827
+
1828
+ if (tiedReg.isLeadConsecutive())
1829
+ sb.appendFormat("|Lead[%u]", tiedReg.consecutiveData() + 1u);
1830
+
1831
+ if (tiedReg.hasUseId())
1832
+ sb.appendFormat("|Use=%u", tiedReg.useId());
1833
+ else if (tiedReg.isUse())
1834
+ sb.append("|Use");
1835
+
1836
+ if (tiedReg.isUseConsecutive() && !tiedReg.isLeadConsecutive())
1837
+ sb.appendFormat("+%u", tiedReg.consecutiveData());
1838
+
1839
+ if (tiedReg.hasOutId())
1840
+ sb.appendFormat("|Out=%u", tiedReg.outId());
1841
+ else if (tiedReg.isOut())
1842
+ sb.append("|Out");
1843
+
1844
+ if (tiedReg.isOutConsecutive() && !tiedReg.isLeadConsecutive())
1845
+ sb.appendFormat("+%u", tiedReg.consecutiveData());
1846
+
1847
+ if (tiedReg.isLast())
1848
+ sb.append("|Last");
1849
+
1850
+ if (tiedReg.isKill())
1851
+ sb.append("|Kill");
1852
+
1853
+ sb.append("}");
1854
+ }
1855
+ }
1856
+
1857
+ ASMJIT_FAVOR_SIZE Error BaseRAPass::annotateCode() noexcept {
1858
+ StringTmp<1024> sb;
1859
+
1860
+ for (const RABlock* block : _blocks) {
1861
+ BaseNode* node = block->first();
1862
+ if (!node) continue;
1863
+
1864
+ BaseNode* last = block->last();
1865
+ for (;;) {
1866
+ sb.clear();
1867
+ Formatter::formatNode(sb, _formatOptions, cc(), node);
1868
+
1869
+ if (hasDiagnosticOption(DiagnosticOptions::kRADebugLiveness) && node->isInst() && node->hasPassData()) {
1870
+ const RAInst* raInst = node->passData<RAInst>();
1871
+ if (raInst->tiedCount() > 0) {
1872
+ sb.padEnd(40);
1873
+ sb.append(" | ");
1874
+ RAPass_formatLiveness(this, sb, raInst);
1875
+ }
1876
+ }
1877
+
1878
+ node->setInlineComment(static_cast<char*>(cc()->_dataZone.dup(sb.data(), sb.size(), true)));
1879
+ if (node == last)
1880
+ break;
1881
+ node = node->next();
1882
+ }
1883
+ }
1884
+
1885
+ return kErrorOk;
1886
+ }
1887
+
1888
+ ASMJIT_FAVOR_SIZE Error BaseRAPass::_dumpBlockIds(String& sb, const RABlocks& blocks) noexcept {
1889
+ for (uint32_t i = 0, size = blocks.size(); i < size; i++) {
1890
+ const RABlock* block = blocks[i];
1891
+ if (i != 0)
1892
+ ASMJIT_PROPAGATE(sb.appendFormat(", #%u", block->blockId()));
1893
+ else
1894
+ ASMJIT_PROPAGATE(sb.appendFormat("#%u", block->blockId()));
1895
+ }
1896
+ return kErrorOk;
1897
+ }
1898
+
1899
+ ASMJIT_FAVOR_SIZE Error BaseRAPass::_dumpBlockLiveness(String& sb, const RABlock* block) noexcept {
1900
+ for (uint32_t liveType = 0; liveType < RABlock::kLiveCount; liveType++) {
1901
+ const char* bitsName = liveType == RABlock::kLiveIn ? "IN " :
1902
+ liveType == RABlock::kLiveOut ? "OUT " :
1903
+ liveType == RABlock::kLiveGen ? "GEN " : "KILL";
1904
+
1905
+ const ZoneBitVector& bits = block->_liveBits[liveType];
1906
+ uint32_t size = bits.size();
1907
+ ASMJIT_ASSERT(size <= workRegCount());
1908
+
1909
+ uint32_t n = 0;
1910
+ for (uint32_t workId = 0; workId < size; workId++) {
1911
+ if (bits.bitAt(workId)) {
1912
+ RAWorkReg* wReg = workRegById(workId);
1913
+
1914
+ if (!n)
1915
+ sb.appendFormat(" %s [", bitsName);
1916
+ else
1917
+ sb.append(", ");
1918
+
1919
+ sb.append(wReg->name());
1920
+ n++;
1921
+ }
1922
+ }
1923
+
1924
+ if (n)
1925
+ sb.append("]\n");
1926
+ }
1927
+
1928
+ return kErrorOk;
1929
+ }
1930
+
1931
+ ASMJIT_FAVOR_SIZE Error BaseRAPass::_dumpLiveSpans(String& sb) noexcept {
1932
+ uint32_t numWorkRegs = _workRegs.size();
1933
+ uint32_t maxSize = _maxWorkRegNameSize;
1934
+
1935
+ for (uint32_t workId = 0; workId < numWorkRegs; workId++) {
1936
+ RAWorkReg* workReg = _workRegs[workId];
1937
+
1938
+ sb.append(" ");
1939
+
1940
+ size_t oldSize = sb.size();
1941
+ sb.append(workReg->name());
1942
+ sb.padEnd(oldSize + maxSize);
1943
+
1944
+ RALiveStats& stats = workReg->liveStats();
1945
+ sb.appendFormat(" {id:%04u width: %-4u freq: %0.4f priority=%0.4f}",
1946
+ workReg->virtId(),
1947
+ stats.width(),
1948
+ stats.freq(),
1949
+ stats.priority());
1950
+ sb.append(": ");
1951
+
1952
+ LiveRegSpans& liveSpans = workReg->liveSpans();
1953
+ for (uint32_t x = 0; x < liveSpans.size(); x++) {
1954
+ const LiveRegSpan& liveSpan = liveSpans[x];
1955
+ if (x)
1956
+ sb.append(", ");
1957
+ sb.appendFormat("[%u:%u]", liveSpan.a, liveSpan.b);
1958
+ }
1959
+
1960
+ sb.append('\n');
1961
+ }
1962
+
1963
+ return kErrorOk;
1964
+ }
1965
+ #endif
1966
+
1967
+ ASMJIT_END_NAMESPACE
1968
+
1969
+ #endif // !ASMJIT_NO_COMPILER