asmjit 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (201) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +1 -1
  3. data/asmjit.gemspec +1 -1
  4. data/ext/asmjit/asmjit/.editorconfig +10 -0
  5. data/ext/asmjit/asmjit/.github/FUNDING.yml +1 -0
  6. data/ext/asmjit/asmjit/.github/workflows/build-config.json +47 -0
  7. data/ext/asmjit/asmjit/.github/workflows/build.yml +156 -0
  8. data/ext/asmjit/asmjit/.gitignore +6 -0
  9. data/ext/asmjit/asmjit/CMakeLists.txt +611 -0
  10. data/ext/asmjit/asmjit/LICENSE.md +17 -0
  11. data/ext/asmjit/asmjit/README.md +69 -0
  12. data/ext/asmjit/asmjit/src/asmjit/a64.h +62 -0
  13. data/ext/asmjit/asmjit/src/asmjit/arm/a64archtraits_p.h +81 -0
  14. data/ext/asmjit/asmjit/src/asmjit/arm/a64assembler.cpp +5115 -0
  15. data/ext/asmjit/asmjit/src/asmjit/arm/a64assembler.h +72 -0
  16. data/ext/asmjit/asmjit/src/asmjit/arm/a64builder.cpp +51 -0
  17. data/ext/asmjit/asmjit/src/asmjit/arm/a64builder.h +57 -0
  18. data/ext/asmjit/asmjit/src/asmjit/arm/a64compiler.cpp +60 -0
  19. data/ext/asmjit/asmjit/src/asmjit/arm/a64compiler.h +247 -0
  20. data/ext/asmjit/asmjit/src/asmjit/arm/a64emithelper.cpp +464 -0
  21. data/ext/asmjit/asmjit/src/asmjit/arm/a64emithelper_p.h +50 -0
  22. data/ext/asmjit/asmjit/src/asmjit/arm/a64emitter.h +1228 -0
  23. data/ext/asmjit/asmjit/src/asmjit/arm/a64formatter.cpp +298 -0
  24. data/ext/asmjit/asmjit/src/asmjit/arm/a64formatter_p.h +59 -0
  25. data/ext/asmjit/asmjit/src/asmjit/arm/a64func.cpp +189 -0
  26. data/ext/asmjit/asmjit/src/asmjit/arm/a64func_p.h +33 -0
  27. data/ext/asmjit/asmjit/src/asmjit/arm/a64globals.h +1894 -0
  28. data/ext/asmjit/asmjit/src/asmjit/arm/a64instapi.cpp +278 -0
  29. data/ext/asmjit/asmjit/src/asmjit/arm/a64instapi_p.h +41 -0
  30. data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb.cpp +1957 -0
  31. data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb.h +74 -0
  32. data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb_p.h +876 -0
  33. data/ext/asmjit/asmjit/src/asmjit/arm/a64operand.cpp +85 -0
  34. data/ext/asmjit/asmjit/src/asmjit/arm/a64operand.h +312 -0
  35. data/ext/asmjit/asmjit/src/asmjit/arm/a64rapass.cpp +852 -0
  36. data/ext/asmjit/asmjit/src/asmjit/arm/a64rapass_p.h +105 -0
  37. data/ext/asmjit/asmjit/src/asmjit/arm/a64utils.h +179 -0
  38. data/ext/asmjit/asmjit/src/asmjit/arm/armformatter.cpp +143 -0
  39. data/ext/asmjit/asmjit/src/asmjit/arm/armformatter_p.h +44 -0
  40. data/ext/asmjit/asmjit/src/asmjit/arm/armglobals.h +21 -0
  41. data/ext/asmjit/asmjit/src/asmjit/arm/armoperand.h +621 -0
  42. data/ext/asmjit/asmjit/src/asmjit/arm.h +62 -0
  43. data/ext/asmjit/asmjit/src/asmjit/asmjit-scope-begin.h +17 -0
  44. data/ext/asmjit/asmjit/src/asmjit/asmjit-scope-end.h +9 -0
  45. data/ext/asmjit/asmjit/src/asmjit/asmjit.h +33 -0
  46. data/ext/asmjit/asmjit/src/asmjit/core/api-build_p.h +55 -0
  47. data/ext/asmjit/asmjit/src/asmjit/core/api-config.h +613 -0
  48. data/ext/asmjit/asmjit/src/asmjit/core/archcommons.h +229 -0
  49. data/ext/asmjit/asmjit/src/asmjit/core/archtraits.cpp +160 -0
  50. data/ext/asmjit/asmjit/src/asmjit/core/archtraits.h +290 -0
  51. data/ext/asmjit/asmjit/src/asmjit/core/assembler.cpp +406 -0
  52. data/ext/asmjit/asmjit/src/asmjit/core/assembler.h +129 -0
  53. data/ext/asmjit/asmjit/src/asmjit/core/builder.cpp +889 -0
  54. data/ext/asmjit/asmjit/src/asmjit/core/builder.h +1391 -0
  55. data/ext/asmjit/asmjit/src/asmjit/core/codebuffer.h +113 -0
  56. data/ext/asmjit/asmjit/src/asmjit/core/codeholder.cpp +1149 -0
  57. data/ext/asmjit/asmjit/src/asmjit/core/codeholder.h +1035 -0
  58. data/ext/asmjit/asmjit/src/asmjit/core/codewriter.cpp +175 -0
  59. data/ext/asmjit/asmjit/src/asmjit/core/codewriter_p.h +179 -0
  60. data/ext/asmjit/asmjit/src/asmjit/core/compiler.cpp +582 -0
  61. data/ext/asmjit/asmjit/src/asmjit/core/compiler.h +737 -0
  62. data/ext/asmjit/asmjit/src/asmjit/core/compilerdefs.h +173 -0
  63. data/ext/asmjit/asmjit/src/asmjit/core/constpool.cpp +363 -0
  64. data/ext/asmjit/asmjit/src/asmjit/core/constpool.h +250 -0
  65. data/ext/asmjit/asmjit/src/asmjit/core/cpuinfo.cpp +1162 -0
  66. data/ext/asmjit/asmjit/src/asmjit/core/cpuinfo.h +813 -0
  67. data/ext/asmjit/asmjit/src/asmjit/core/emithelper.cpp +323 -0
  68. data/ext/asmjit/asmjit/src/asmjit/core/emithelper_p.h +58 -0
  69. data/ext/asmjit/asmjit/src/asmjit/core/emitter.cpp +333 -0
  70. data/ext/asmjit/asmjit/src/asmjit/core/emitter.h +741 -0
  71. data/ext/asmjit/asmjit/src/asmjit/core/emitterutils.cpp +129 -0
  72. data/ext/asmjit/asmjit/src/asmjit/core/emitterutils_p.h +89 -0
  73. data/ext/asmjit/asmjit/src/asmjit/core/environment.cpp +46 -0
  74. data/ext/asmjit/asmjit/src/asmjit/core/environment.h +508 -0
  75. data/ext/asmjit/asmjit/src/asmjit/core/errorhandler.cpp +14 -0
  76. data/ext/asmjit/asmjit/src/asmjit/core/errorhandler.h +228 -0
  77. data/ext/asmjit/asmjit/src/asmjit/core/formatter.cpp +584 -0
  78. data/ext/asmjit/asmjit/src/asmjit/core/formatter.h +247 -0
  79. data/ext/asmjit/asmjit/src/asmjit/core/formatter_p.h +34 -0
  80. data/ext/asmjit/asmjit/src/asmjit/core/func.cpp +286 -0
  81. data/ext/asmjit/asmjit/src/asmjit/core/func.h +1445 -0
  82. data/ext/asmjit/asmjit/src/asmjit/core/funcargscontext.cpp +293 -0
  83. data/ext/asmjit/asmjit/src/asmjit/core/funcargscontext_p.h +199 -0
  84. data/ext/asmjit/asmjit/src/asmjit/core/globals.cpp +133 -0
  85. data/ext/asmjit/asmjit/src/asmjit/core/globals.h +393 -0
  86. data/ext/asmjit/asmjit/src/asmjit/core/inst.cpp +113 -0
  87. data/ext/asmjit/asmjit/src/asmjit/core/inst.h +772 -0
  88. data/ext/asmjit/asmjit/src/asmjit/core/jitallocator.cpp +1242 -0
  89. data/ext/asmjit/asmjit/src/asmjit/core/jitallocator.h +261 -0
  90. data/ext/asmjit/asmjit/src/asmjit/core/jitruntime.cpp +80 -0
  91. data/ext/asmjit/asmjit/src/asmjit/core/jitruntime.h +89 -0
  92. data/ext/asmjit/asmjit/src/asmjit/core/logger.cpp +69 -0
  93. data/ext/asmjit/asmjit/src/asmjit/core/logger.h +198 -0
  94. data/ext/asmjit/asmjit/src/asmjit/core/misc_p.h +33 -0
  95. data/ext/asmjit/asmjit/src/asmjit/core/operand.cpp +132 -0
  96. data/ext/asmjit/asmjit/src/asmjit/core/operand.h +1611 -0
  97. data/ext/asmjit/asmjit/src/asmjit/core/osutils.cpp +84 -0
  98. data/ext/asmjit/asmjit/src/asmjit/core/osutils.h +61 -0
  99. data/ext/asmjit/asmjit/src/asmjit/core/osutils_p.h +68 -0
  100. data/ext/asmjit/asmjit/src/asmjit/core/raassignment_p.h +418 -0
  101. data/ext/asmjit/asmjit/src/asmjit/core/rabuilders_p.h +612 -0
  102. data/ext/asmjit/asmjit/src/asmjit/core/radefs_p.h +1204 -0
  103. data/ext/asmjit/asmjit/src/asmjit/core/ralocal.cpp +1166 -0
  104. data/ext/asmjit/asmjit/src/asmjit/core/ralocal_p.h +254 -0
  105. data/ext/asmjit/asmjit/src/asmjit/core/rapass.cpp +1969 -0
  106. data/ext/asmjit/asmjit/src/asmjit/core/rapass_p.h +1183 -0
  107. data/ext/asmjit/asmjit/src/asmjit/core/rastack.cpp +184 -0
  108. data/ext/asmjit/asmjit/src/asmjit/core/rastack_p.h +171 -0
  109. data/ext/asmjit/asmjit/src/asmjit/core/string.cpp +559 -0
  110. data/ext/asmjit/asmjit/src/asmjit/core/string.h +372 -0
  111. data/ext/asmjit/asmjit/src/asmjit/core/support.cpp +494 -0
  112. data/ext/asmjit/asmjit/src/asmjit/core/support.h +1773 -0
  113. data/ext/asmjit/asmjit/src/asmjit/core/target.cpp +14 -0
  114. data/ext/asmjit/asmjit/src/asmjit/core/target.h +53 -0
  115. data/ext/asmjit/asmjit/src/asmjit/core/type.cpp +74 -0
  116. data/ext/asmjit/asmjit/src/asmjit/core/type.h +419 -0
  117. data/ext/asmjit/asmjit/src/asmjit/core/virtmem.cpp +722 -0
  118. data/ext/asmjit/asmjit/src/asmjit/core/virtmem.h +242 -0
  119. data/ext/asmjit/asmjit/src/asmjit/core/zone.cpp +353 -0
  120. data/ext/asmjit/asmjit/src/asmjit/core/zone.h +615 -0
  121. data/ext/asmjit/asmjit/src/asmjit/core/zonehash.cpp +309 -0
  122. data/ext/asmjit/asmjit/src/asmjit/core/zonehash.h +186 -0
  123. data/ext/asmjit/asmjit/src/asmjit/core/zonelist.cpp +163 -0
  124. data/ext/asmjit/asmjit/src/asmjit/core/zonelist.h +209 -0
  125. data/ext/asmjit/asmjit/src/asmjit/core/zonestack.cpp +176 -0
  126. data/ext/asmjit/asmjit/src/asmjit/core/zonestack.h +239 -0
  127. data/ext/asmjit/asmjit/src/asmjit/core/zonestring.h +120 -0
  128. data/ext/asmjit/asmjit/src/asmjit/core/zonetree.cpp +99 -0
  129. data/ext/asmjit/asmjit/src/asmjit/core/zonetree.h +380 -0
  130. data/ext/asmjit/asmjit/src/asmjit/core/zonevector.cpp +356 -0
  131. data/ext/asmjit/asmjit/src/asmjit/core/zonevector.h +690 -0
  132. data/ext/asmjit/asmjit/src/asmjit/core.h +1861 -0
  133. data/ext/asmjit/asmjit/src/asmjit/x86/x86archtraits_p.h +148 -0
  134. data/ext/asmjit/asmjit/src/asmjit/x86/x86assembler.cpp +5110 -0
  135. data/ext/asmjit/asmjit/src/asmjit/x86/x86assembler.h +685 -0
  136. data/ext/asmjit/asmjit/src/asmjit/x86/x86builder.cpp +52 -0
  137. data/ext/asmjit/asmjit/src/asmjit/x86/x86builder.h +351 -0
  138. data/ext/asmjit/asmjit/src/asmjit/x86/x86compiler.cpp +61 -0
  139. data/ext/asmjit/asmjit/src/asmjit/x86/x86compiler.h +721 -0
  140. data/ext/asmjit/asmjit/src/asmjit/x86/x86emithelper.cpp +619 -0
  141. data/ext/asmjit/asmjit/src/asmjit/x86/x86emithelper_p.h +60 -0
  142. data/ext/asmjit/asmjit/src/asmjit/x86/x86emitter.h +4315 -0
  143. data/ext/asmjit/asmjit/src/asmjit/x86/x86formatter.cpp +944 -0
  144. data/ext/asmjit/asmjit/src/asmjit/x86/x86formatter_p.h +58 -0
  145. data/ext/asmjit/asmjit/src/asmjit/x86/x86func.cpp +503 -0
  146. data/ext/asmjit/asmjit/src/asmjit/x86/x86func_p.h +33 -0
  147. data/ext/asmjit/asmjit/src/asmjit/x86/x86globals.h +2169 -0
  148. data/ext/asmjit/asmjit/src/asmjit/x86/x86instapi.cpp +1732 -0
  149. data/ext/asmjit/asmjit/src/asmjit/x86/x86instapi_p.h +41 -0
  150. data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb.cpp +4427 -0
  151. data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb.h +563 -0
  152. data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb_p.h +311 -0
  153. data/ext/asmjit/asmjit/src/asmjit/x86/x86opcode_p.h +436 -0
  154. data/ext/asmjit/asmjit/src/asmjit/x86/x86operand.cpp +231 -0
  155. data/ext/asmjit/asmjit/src/asmjit/x86/x86operand.h +1085 -0
  156. data/ext/asmjit/asmjit/src/asmjit/x86/x86rapass.cpp +1509 -0
  157. data/ext/asmjit/asmjit/src/asmjit/x86/x86rapass_p.h +94 -0
  158. data/ext/asmjit/asmjit/src/asmjit/x86.h +93 -0
  159. data/ext/asmjit/asmjit/src/asmjit.natvis +245 -0
  160. data/ext/asmjit/asmjit/test/asmjit_test_assembler.cpp +84 -0
  161. data/ext/asmjit/asmjit/test/asmjit_test_assembler.h +85 -0
  162. data/ext/asmjit/asmjit/test/asmjit_test_assembler_a64.cpp +4006 -0
  163. data/ext/asmjit/asmjit/test/asmjit_test_assembler_x64.cpp +17833 -0
  164. data/ext/asmjit/asmjit/test/asmjit_test_assembler_x86.cpp +8300 -0
  165. data/ext/asmjit/asmjit/test/asmjit_test_compiler.cpp +253 -0
  166. data/ext/asmjit/asmjit/test/asmjit_test_compiler.h +73 -0
  167. data/ext/asmjit/asmjit/test/asmjit_test_compiler_a64.cpp +690 -0
  168. data/ext/asmjit/asmjit/test/asmjit_test_compiler_x86.cpp +4317 -0
  169. data/ext/asmjit/asmjit/test/asmjit_test_emitters.cpp +197 -0
  170. data/ext/asmjit/asmjit/test/asmjit_test_instinfo.cpp +181 -0
  171. data/ext/asmjit/asmjit/test/asmjit_test_misc.h +257 -0
  172. data/ext/asmjit/asmjit/test/asmjit_test_perf.cpp +62 -0
  173. data/ext/asmjit/asmjit/test/asmjit_test_perf.h +61 -0
  174. data/ext/asmjit/asmjit/test/asmjit_test_perf_a64.cpp +699 -0
  175. data/ext/asmjit/asmjit/test/asmjit_test_perf_x86.cpp +5032 -0
  176. data/ext/asmjit/asmjit/test/asmjit_test_unit.cpp +172 -0
  177. data/ext/asmjit/asmjit/test/asmjit_test_x86_sections.cpp +172 -0
  178. data/ext/asmjit/asmjit/test/asmjitutils.h +38 -0
  179. data/ext/asmjit/asmjit/test/broken.cpp +312 -0
  180. data/ext/asmjit/asmjit/test/broken.h +148 -0
  181. data/ext/asmjit/asmjit/test/cmdline.h +61 -0
  182. data/ext/asmjit/asmjit/test/performancetimer.h +41 -0
  183. data/ext/asmjit/asmjit/tools/configure-makefiles.sh +13 -0
  184. data/ext/asmjit/asmjit/tools/configure-ninja.sh +13 -0
  185. data/ext/asmjit/asmjit/tools/configure-sanitizers.sh +13 -0
  186. data/ext/asmjit/asmjit/tools/configure-vs2019-x64.bat +2 -0
  187. data/ext/asmjit/asmjit/tools/configure-vs2019-x86.bat +2 -0
  188. data/ext/asmjit/asmjit/tools/configure-vs2022-x64.bat +2 -0
  189. data/ext/asmjit/asmjit/tools/configure-vs2022-x86.bat +2 -0
  190. data/ext/asmjit/asmjit/tools/configure-xcode.sh +8 -0
  191. data/ext/asmjit/asmjit/tools/enumgen.js +417 -0
  192. data/ext/asmjit/asmjit/tools/enumgen.sh +3 -0
  193. data/ext/asmjit/asmjit/tools/tablegen-arm.js +365 -0
  194. data/ext/asmjit/asmjit/tools/tablegen-arm.sh +3 -0
  195. data/ext/asmjit/asmjit/tools/tablegen-x86.js +2638 -0
  196. data/ext/asmjit/asmjit/tools/tablegen-x86.sh +3 -0
  197. data/ext/asmjit/asmjit/tools/tablegen.js +947 -0
  198. data/ext/asmjit/asmjit/tools/tablegen.sh +4 -0
  199. data/ext/asmjit/asmjit.cc +18 -0
  200. data/lib/asmjit/version.rb +1 -1
  201. metadata +197 -2
@@ -0,0 +1,1509 @@
1
+ // This file is part of AsmJit project <https://asmjit.com>
2
+ //
3
+ // See asmjit.h or LICENSE.md for license and copyright information
4
+ // SPDX-License-Identifier: Zlib
5
+
6
+ #include "../core/api-build_p.h"
7
+ #if !defined(ASMJIT_NO_X86) && !defined(ASMJIT_NO_COMPILER)
8
+
9
+ #include "../core/cpuinfo.h"
10
+ #include "../core/support.h"
11
+ #include "../core/type.h"
12
+ #include "../x86/x86assembler.h"
13
+ #include "../x86/x86compiler.h"
14
+ #include "../x86/x86instapi_p.h"
15
+ #include "../x86/x86instdb_p.h"
16
+ #include "../x86/x86emithelper_p.h"
17
+ #include "../x86/x86rapass_p.h"
18
+
19
+ ASMJIT_BEGIN_SUB_NAMESPACE(x86)
20
+
21
+ // x86::X86RAPass - Utilities
22
+ // ==========================
23
+
24
+ static ASMJIT_FORCE_INLINE uint64_t raImmMaskFromSize(uint32_t size) noexcept {
25
+ ASMJIT_ASSERT(size > 0 && size < 256);
26
+ static constexpr uint64_t masks[] = {
27
+ 0x00000000000000FFu, // 1
28
+ 0x000000000000FFFFu, // 2
29
+ 0x00000000FFFFFFFFu, // 4
30
+ 0xFFFFFFFFFFFFFFFFu, // 8
31
+ 0x0000000000000000u, // 16
32
+ 0x0000000000000000u, // 32
33
+ 0x0000000000000000u, // 64
34
+ 0x0000000000000000u, // 128
35
+ 0x0000000000000000u // 256
36
+ };
37
+ return masks[Support::ctz(size)];
38
+ }
39
+
40
+ static const RegMask raConsecutiveLeadCountToRegMaskFilter[5] = {
41
+ 0xFFFFFFFFu, // [0] No consecutive.
42
+ 0x00000000u, // [1] Invalid, never used.
43
+ 0x55555555u, // [2] Even registers.
44
+ 0x00000000u, // [3] Invalid, never used.
45
+ 0x11111111u // [4] Every fourth register.
46
+ };
47
+
48
+ static ASMJIT_FORCE_INLINE RATiedFlags raUseOutFlagsFromRWFlags(OpRWFlags rwFlags) noexcept {
49
+ static constexpr RATiedFlags map[] = {
50
+ RATiedFlags::kNone,
51
+ RATiedFlags::kRead | RATiedFlags::kUse, // kRead
52
+ RATiedFlags::kWrite | RATiedFlags::kOut, // kWrite
53
+ RATiedFlags::kRW | RATiedFlags::kUse, // kRW
54
+ RATiedFlags::kNone,
55
+ RATiedFlags::kRead | RATiedFlags::kUse | RATiedFlags::kUseRM, // kRead | kRegMem
56
+ RATiedFlags::kWrite | RATiedFlags::kOut | RATiedFlags::kOutRM, // kWrite | kRegMem
57
+ RATiedFlags::kRW | RATiedFlags::kUse | RATiedFlags::kUseRM // kRW | kRegMem
58
+ };
59
+
60
+ return map[uint32_t(rwFlags & (OpRWFlags::kRW | OpRWFlags::kRegMem))];
61
+ }
62
+
63
+ static ASMJIT_FORCE_INLINE RATiedFlags raRegRwFlags(OpRWFlags flags) noexcept {
64
+ return (RATiedFlags)raUseOutFlagsFromRWFlags(flags);
65
+ }
66
+
67
+ static ASMJIT_FORCE_INLINE RATiedFlags raMemBaseRwFlags(OpRWFlags flags) noexcept {
68
+ constexpr uint32_t kShift = Support::ConstCTZ<uint32_t(OpRWFlags::kMemBaseRW)>::value;
69
+ return (RATiedFlags)raUseOutFlagsFromRWFlags(OpRWFlags(uint32_t(flags) >> kShift) & OpRWFlags::kRW);
70
+ }
71
+
72
+ static ASMJIT_FORCE_INLINE RATiedFlags raMemIndexRwFlags(OpRWFlags flags) noexcept {
73
+ constexpr uint32_t kShift = Support::ConstCTZ<uint32_t(OpRWFlags::kMemIndexRW)>::value;
74
+ return (RATiedFlags)raUseOutFlagsFromRWFlags(OpRWFlags(uint32_t(flags) >> kShift) & OpRWFlags::kRW);
75
+ }
76
+
77
+ // x86::RACFGBuilder
78
+ // =================
79
+
80
+ class RACFGBuilder : public RACFGBuilderT<RACFGBuilder> {
81
+ public:
82
+ Arch _arch;
83
+ bool _is64Bit;
84
+ bool _avxEnabled;
85
+
86
+ inline RACFGBuilder(X86RAPass* pass) noexcept
87
+ : RACFGBuilderT<RACFGBuilder>(pass),
88
+ _arch(pass->cc()->arch()),
89
+ _is64Bit(pass->registerSize() == 8),
90
+ _avxEnabled(pass->avxEnabled()) {
91
+ }
92
+
93
+ inline Compiler* cc() const noexcept { return static_cast<Compiler*>(_cc); }
94
+
95
+ inline uint32_t choose(uint32_t sseInst, uint32_t avxInst) const noexcept {
96
+ return _avxEnabled ? avxInst : sseInst;
97
+ }
98
+
99
+ Error onInst(InstNode* inst, InstControlFlow& cf, RAInstBuilder& ib) noexcept;
100
+
101
+ Error onBeforeInvoke(InvokeNode* invokeNode) noexcept;
102
+ Error onInvoke(InvokeNode* invokeNode, RAInstBuilder& ib) noexcept;
103
+
104
+ Error moveVecToPtr(InvokeNode* invokeNode, const FuncValue& arg, const Vec& src, BaseReg* out) noexcept;
105
+ Error moveImmToRegArg(InvokeNode* invokeNode, const FuncValue& arg, const Imm& imm_, BaseReg* out) noexcept;
106
+ Error moveImmToStackArg(InvokeNode* invokeNode, const FuncValue& arg, const Imm& imm_) noexcept;
107
+ Error moveRegToStackArg(InvokeNode* invokeNode, const FuncValue& arg, const BaseReg& reg) noexcept;
108
+
109
+ Error onBeforeRet(FuncRetNode* funcRet) noexcept;
110
+ Error onRet(FuncRetNode* funcRet, RAInstBuilder& ib) noexcept;
111
+ };
112
+
113
+ // x86::RACFGBuilder - OnInst
114
+ // ==========================
115
+
116
+ Error RACFGBuilder::onInst(InstNode* inst, InstControlFlow& cf, RAInstBuilder& ib) noexcept {
117
+ InstRWInfo rwInfo;
118
+
119
+ InstId instId = inst->id();
120
+ if (Inst::isDefinedId(instId)) {
121
+ uint32_t opCount = inst->opCount();
122
+ const Operand* opArray = inst->operands();
123
+ ASMJIT_PROPAGATE(InstInternal::queryRWInfo(_arch, inst->baseInst(), opArray, opCount, &rwInfo));
124
+
125
+ const InstDB::InstInfo& instInfo = InstDB::infoById(instId);
126
+ bool hasGpbHiConstraint = false;
127
+ uint32_t singleRegOps = 0;
128
+
129
+ // Copy instruction RW flags to instruction builder except kMovOp, which is propagated manually later.
130
+ ib.addInstRWFlags(rwInfo.instFlags() & ~InstRWFlags::kMovOp);
131
+
132
+ // Mask of all operand types used by the instruction - can be used as an optimization later.
133
+ uint32_t opTypesMask = 0u;
134
+
135
+ if (opCount) {
136
+ // The mask is for all registers, but we are mostly interested in AVX-512 registers at the moment. The mask
137
+ // will be combined with all available registers of the Compiler at the end so we it never use more registers
138
+ // than available.
139
+ RegMask instructionAllowedRegs = 0xFFFFFFFFu;
140
+
141
+ uint32_t consecutiveOffset = 0;
142
+ uint32_t consecutiveLeadId = Globals::kInvalidId;
143
+ uint32_t consecutiveParent = Globals::kInvalidId;
144
+
145
+ if (instInfo.isEvex()) {
146
+ // EVEX instruction and VEX instructions that can be encoded with EVEX have the possibility to use 32 SIMD
147
+ // registers (XMM/YMM/ZMM).
148
+ if (instInfo.isVex() && !instInfo.isEvexCompatible()) {
149
+ if (instInfo.isEvexKRegOnly()) {
150
+ // EVEX encodable only if the first operand is K register (compare instructions).
151
+ if (!Reg::isKReg(opArray[0]))
152
+ instructionAllowedRegs = 0xFFFFu;
153
+ }
154
+ else if (instInfo.isEvexTwoOpOnly()) {
155
+ // EVEX encodable only if the instruction has two operands (gather instructions).
156
+ if (opCount != 2)
157
+ instructionAllowedRegs = 0xFFFFu;
158
+ }
159
+ else {
160
+ instructionAllowedRegs = 0xFFFFu;
161
+ }
162
+ }
163
+ }
164
+ else if (instInfo.isEvexTransformable()) {
165
+ ib.addAggregatedFlags(RATiedFlags::kInst_IsTransformable);
166
+ }
167
+ else {
168
+ // Not EVEX, restrict everything to [0-15] registers.
169
+ instructionAllowedRegs = 0xFFFFu;
170
+ }
171
+
172
+ for (uint32_t i = 0; i < opCount; i++) {
173
+ const Operand& op = opArray[i];
174
+ const OpRWInfo& opRwInfo = rwInfo.operand(i);
175
+
176
+ opTypesMask |= 1u << uint32_t(op.opType());
177
+
178
+ if (op.isReg()) {
179
+ // Register Operand
180
+ // ----------------
181
+ const Reg& reg = op.as<Reg>();
182
+
183
+ RATiedFlags flags = raRegRwFlags(opRwInfo.opFlags());
184
+ RegMask allowedRegs = instructionAllowedRegs;
185
+
186
+ // X86-specific constraints related to LO|HI general purpose registers. This is only required when the
187
+ // register is part of the encoding. If the register is fixed we won't restrict anything as it doesn't
188
+ // restrict encoding of other registers.
189
+ if (reg.isGpb() && !opRwInfo.hasOpFlag(OpRWFlags::kRegPhysId)) {
190
+ flags |= RATiedFlags::kX86_Gpb;
191
+ if (!_is64Bit) {
192
+ // Restrict to first four - AL|AH|BL|BH|CL|CH|DL|DH. In 32-bit mode it's not possible to access
193
+ // SIL|DIL, etc, so this is just enough.
194
+ allowedRegs = 0x0Fu;
195
+ }
196
+ else {
197
+ // If we encountered GPB-HI register the situation is much more complicated than in 32-bit mode.
198
+ // We need to patch all registers to not use ID higher than 7 and all GPB-LO registers to not use
199
+ // index higher than 3. Instead of doing the patching here we just set a flag and will do it later,
200
+ // to not complicate this loop.
201
+ if (reg.isGpbHi()) {
202
+ hasGpbHiConstraint = true;
203
+ allowedRegs = 0x0Fu;
204
+ }
205
+ }
206
+ }
207
+
208
+ uint32_t vIndex = Operand::virtIdToIndex(reg.id());
209
+ if (vIndex < Operand::kVirtIdCount) {
210
+ RAWorkReg* workReg;
211
+ ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
212
+
213
+ // Use RW instead of Write in case that not the whole register is overwritten. This is important
214
+ // for liveness as we cannot kill a register that will be used. For example `mov al, 0xFF` is not
215
+ // a write-only operation if user allocated the whole `rax` register.
216
+ if ((flags & RATiedFlags::kRW) == RATiedFlags::kWrite) {
217
+ if (workReg->regByteMask() & ~(opRwInfo.writeByteMask() | opRwInfo.extendByteMask())) {
218
+ // Not write-only operation.
219
+ flags = (flags & ~RATiedFlags::kOut) | (RATiedFlags::kRead | RATiedFlags::kUse);
220
+ }
221
+ }
222
+
223
+ // Do not use RegMem flag if changing Reg to Mem requires additional CPU feature that may not be enabled.
224
+ if (rwInfo.rmFeature() && Support::test(flags, RATiedFlags::kUseRM | RATiedFlags::kOutRM)) {
225
+ flags &= ~(RATiedFlags::kUseRM | RATiedFlags::kOutRM);
226
+ }
227
+
228
+ RegGroup group = workReg->group();
229
+ RegMask useRegs = _pass->_availableRegs[group] & allowedRegs;
230
+ RegMask outRegs = useRegs;
231
+
232
+ uint32_t useId = BaseReg::kIdBad;
233
+ uint32_t outId = BaseReg::kIdBad;
234
+
235
+ uint32_t useRewriteMask = 0;
236
+ uint32_t outRewriteMask = 0;
237
+
238
+ if (opRwInfo.consecutiveLeadCount()) {
239
+ // There must be a single consecutive register lead, otherwise the RW data is invalid.
240
+ if (consecutiveLeadId != Globals::kInvalidId)
241
+ return DebugUtils::errored(kErrorInvalidState);
242
+
243
+ // A consecutive lead register cannot be used as a consecutive +1/+2/+3 register, the registers must be distinct.
244
+ if (RATiedReg::consecutiveDataFromFlags(flags) != 0)
245
+ return DebugUtils::errored(kErrorNotConsecutiveRegs);
246
+
247
+ flags |= RATiedFlags::kLeadConsecutive | RATiedReg::consecutiveDataToFlags(opRwInfo.consecutiveLeadCount() - 1);
248
+ consecutiveLeadId = workReg->workId();
249
+
250
+ RegMask filter = raConsecutiveLeadCountToRegMaskFilter[opRwInfo.consecutiveLeadCount()];
251
+ if (Support::test(flags, RATiedFlags::kUse)) {
252
+ flags |= RATiedFlags::kUseConsecutive;
253
+ useRegs &= filter;
254
+ }
255
+ else {
256
+ flags |= RATiedFlags::kOutConsecutive;
257
+ outRegs &= filter;
258
+ }
259
+ }
260
+
261
+ if (Support::test(flags, RATiedFlags::kUse)) {
262
+ useRewriteMask = Support::bitMask(inst->getRewriteIndex(&reg._baseId));
263
+ if (opRwInfo.hasOpFlag(OpRWFlags::kRegPhysId)) {
264
+ useId = opRwInfo.physId();
265
+ flags |= RATiedFlags::kUseFixed;
266
+ }
267
+ else if (opRwInfo.hasOpFlag(OpRWFlags::kConsecutive)) {
268
+ if (consecutiveLeadId == Globals::kInvalidId)
269
+ return DebugUtils::errored(kErrorInvalidState);
270
+
271
+ if (consecutiveLeadId == workReg->workId())
272
+ return DebugUtils::errored(kErrorOverlappedRegs);
273
+
274
+ flags |= RATiedFlags::kUseConsecutive | RATiedReg::consecutiveDataToFlags(++consecutiveOffset);
275
+ }
276
+ }
277
+ else {
278
+ outRewriteMask = Support::bitMask(inst->getRewriteIndex(&reg._baseId));
279
+ if (opRwInfo.hasOpFlag(OpRWFlags::kRegPhysId)) {
280
+ outId = opRwInfo.physId();
281
+ flags |= RATiedFlags::kOutFixed;
282
+ }
283
+ else if (opRwInfo.hasOpFlag(OpRWFlags::kConsecutive)) {
284
+ if (consecutiveLeadId == Globals::kInvalidId)
285
+ return DebugUtils::errored(kErrorInvalidState);
286
+
287
+ if (consecutiveLeadId == workReg->workId())
288
+ return DebugUtils::errored(kErrorOverlappedRegs);
289
+
290
+ flags |= RATiedFlags::kOutConsecutive | RATiedReg::consecutiveDataToFlags(++consecutiveOffset);
291
+ }
292
+ }
293
+
294
+ ASMJIT_PROPAGATE(ib.add(workReg, flags, useRegs, useId, useRewriteMask, outRegs, outId, outRewriteMask, opRwInfo.rmSize(), consecutiveParent));
295
+ if (singleRegOps == i)
296
+ singleRegOps++;
297
+
298
+ if (Support::test(flags, RATiedFlags::kLeadConsecutive | RATiedFlags::kUseConsecutive | RATiedFlags::kOutConsecutive))
299
+ consecutiveParent = workReg->workId();
300
+ }
301
+ }
302
+ else if (op.isMem()) {
303
+ // Memory Operand
304
+ // --------------
305
+ const Mem& mem = op.as<Mem>();
306
+ ib.addForbiddenFlags(RATiedFlags::kUseRM | RATiedFlags::kOutRM);
307
+
308
+ if (mem.isRegHome()) {
309
+ RAWorkReg* workReg;
310
+ ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(mem.baseId()), &workReg));
311
+ _pass->getOrCreateStackSlot(workReg);
312
+ }
313
+ else if (mem.hasBaseReg()) {
314
+ uint32_t vIndex = Operand::virtIdToIndex(mem.baseId());
315
+ if (vIndex < Operand::kVirtIdCount) {
316
+ RAWorkReg* workReg;
317
+ ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
318
+
319
+ RATiedFlags flags = raMemBaseRwFlags(opRwInfo.opFlags());
320
+ RegGroup group = workReg->group();
321
+ RegMask inOutRegs = _pass->_availableRegs[group];
322
+
323
+ uint32_t useId = BaseReg::kIdBad;
324
+ uint32_t outId = BaseReg::kIdBad;
325
+
326
+ uint32_t useRewriteMask = 0;
327
+ uint32_t outRewriteMask = 0;
328
+
329
+ if (Support::test(flags, RATiedFlags::kUse)) {
330
+ useRewriteMask = Support::bitMask(inst->getRewriteIndex(&mem._baseId));
331
+ if (opRwInfo.hasOpFlag(OpRWFlags::kMemPhysId)) {
332
+ useId = opRwInfo.physId();
333
+ flags |= RATiedFlags::kUseFixed;
334
+ }
335
+ }
336
+ else {
337
+ outRewriteMask = Support::bitMask(inst->getRewriteIndex(&mem._baseId));
338
+ if (opRwInfo.hasOpFlag(OpRWFlags::kMemPhysId)) {
339
+ outId = opRwInfo.physId();
340
+ flags |= RATiedFlags::kOutFixed;
341
+ }
342
+ }
343
+
344
+ ASMJIT_PROPAGATE(ib.add(workReg, flags, inOutRegs, useId, useRewriteMask, inOutRegs, outId, outRewriteMask));
345
+ }
346
+ }
347
+
348
+ if (mem.hasIndexReg()) {
349
+ uint32_t vIndex = Operand::virtIdToIndex(mem.indexId());
350
+ if (vIndex < Operand::kVirtIdCount) {
351
+ RAWorkReg* workReg;
352
+ ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
353
+
354
+ RATiedFlags flags = raMemIndexRwFlags(opRwInfo.opFlags());
355
+ RegGroup group = workReg->group();
356
+ RegMask inOutRegs = _pass->_availableRegs[group] & instructionAllowedRegs;
357
+
358
+ // Index registers have never fixed id on X86/x64.
359
+ const uint32_t useId = BaseReg::kIdBad;
360
+ const uint32_t outId = BaseReg::kIdBad;
361
+
362
+ uint32_t useRewriteMask = 0;
363
+ uint32_t outRewriteMask = 0;
364
+
365
+ if (Support::test(flags, RATiedFlags::kUse))
366
+ useRewriteMask = Support::bitMask(inst->getRewriteIndex(&mem._data[Operand::kDataMemIndexId]));
367
+ else
368
+ outRewriteMask = Support::bitMask(inst->getRewriteIndex(&mem._data[Operand::kDataMemIndexId]));
369
+
370
+ ASMJIT_PROPAGATE(ib.add(workReg, RATiedFlags::kUse | RATiedFlags::kRead, inOutRegs, useId, useRewriteMask, inOutRegs, outId, outRewriteMask));
371
+ }
372
+ }
373
+ }
374
+ }
375
+ }
376
+
377
+ // Handle extra operand (either REP {cx|ecx|rcx} or AVX-512 {k} selector).
378
+ if (inst->hasExtraReg()) {
379
+ uint32_t vIndex = Operand::virtIdToIndex(inst->extraReg().id());
380
+ if (vIndex < Operand::kVirtIdCount) {
381
+ RAWorkReg* workReg;
382
+ ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
383
+
384
+ RegGroup group = workReg->group();
385
+ RegMask inOutRegs = _pass->_availableRegs[group];
386
+ uint32_t rewriteMask = Support::bitMask(inst->getRewriteIndex(&inst->extraReg()._id));
387
+
388
+ if (group == RegGroup::kX86_K) {
389
+ // AVX-512 mask selector {k} register - read-only, allocable to any register except {k0}.
390
+ ASMJIT_PROPAGATE(ib.add(workReg, RATiedFlags::kUse | RATiedFlags::kRead, inOutRegs, BaseReg::kIdBad, rewriteMask, inOutRegs, BaseReg::kIdBad, 0));
391
+ singleRegOps = 0;
392
+ }
393
+ else {
394
+ // REP {cx|ecx|rcx} register - read & write, allocable to {cx|ecx|rcx} only.
395
+ ASMJIT_PROPAGATE(ib.add(workReg, RATiedFlags::kUse | RATiedFlags::kRW, inOutRegs, Gp::kIdCx, rewriteMask, inOutRegs, Gp::kIdBad, 0));
396
+ }
397
+ }
398
+ else {
399
+ RegGroup group = inst->extraReg().group();
400
+ if (group == RegGroup::kX86_K && inst->extraReg().id() != 0)
401
+ singleRegOps = 0;
402
+ }
403
+ }
404
+
405
+ // If this instruction has move semantics then check whether it could be eliminated if all virtual registers
406
+ // are allocated into the same register. Take into account the virtual size of the destination register as that's
407
+ // more important than a physical register size in this case.
408
+ if (rwInfo.hasInstFlag(InstRWFlags::kMovOp) && !inst->hasExtraReg() && Support::bitTest(opTypesMask, uint32_t(OperandType::kReg))) {
409
+ // AVX+ move instructions have 3 operand form - the first two operands must be the same to guarantee move semantics.
410
+ if (opCount == 2 || (opCount == 3 && opArray[0] == opArray[1])) {
411
+ uint32_t vIndex = Operand::virtIdToIndex(opArray[0].as<Reg>().id());
412
+ if (vIndex < Operand::kVirtIdCount) {
413
+ const VirtReg* vReg = _cc->virtRegByIndex(vIndex);
414
+ const OpRWInfo& opRwInfo = rwInfo.operand(0);
415
+
416
+ uint64_t remainingByteMask = vReg->workReg()->regByteMask() & ~opRwInfo.writeByteMask();
417
+ if (remainingByteMask == 0u || (remainingByteMask & opRwInfo.extendByteMask()) == 0)
418
+ ib.addInstRWFlags(InstRWFlags::kMovOp);
419
+ }
420
+ }
421
+ }
422
+
423
+ // Handle X86 constraints.
424
+ if (hasGpbHiConstraint) {
425
+ for (RATiedReg& tiedReg : ib) {
426
+ RegMask filter = tiedReg.hasFlag(RATiedFlags::kX86_Gpb) ? 0x0Fu : 0xFFu;
427
+ tiedReg._useRegMask &= filter;
428
+ tiedReg._outRegMask &= filter;
429
+ }
430
+ }
431
+
432
+ if (ib.tiedRegCount() == 1) {
433
+ // Handle special cases of some instructions where all operands share the same
434
+ // register. In such case the single operand becomes read-only or write-only.
435
+ InstSameRegHint sameRegHint = InstSameRegHint::kNone;
436
+ if (singleRegOps == opCount) {
437
+ sameRegHint = instInfo.sameRegHint();
438
+ }
439
+ else if (opCount == 2 && inst->op(1).isImm()) {
440
+ // Handle some tricks used by X86 asm.
441
+ const BaseReg& reg = inst->op(0).as<BaseReg>();
442
+ const Imm& imm = inst->op(1).as<Imm>();
443
+
444
+ const RAWorkReg* workReg = _pass->workRegById(ib[0]->workId());
445
+ uint32_t workRegSize = workReg->signature().size();
446
+
447
+ switch (inst->id()) {
448
+ case Inst::kIdOr: {
449
+ // Sets the value of the destination register to -1, previous content unused.
450
+ if (reg.size() >= 4 || reg.size() >= workRegSize) {
451
+ if (imm.value() == -1 || imm.valueAs<uint64_t>() == raImmMaskFromSize(reg.size()))
452
+ sameRegHint = InstSameRegHint::kWO;
453
+ }
454
+ ASMJIT_FALLTHROUGH;
455
+ }
456
+
457
+ case Inst::kIdAdd:
458
+ case Inst::kIdAnd:
459
+ case Inst::kIdRol:
460
+ case Inst::kIdRor:
461
+ case Inst::kIdSar:
462
+ case Inst::kIdShl:
463
+ case Inst::kIdShr:
464
+ case Inst::kIdSub:
465
+ case Inst::kIdXor: {
466
+ // Updates [E|R]FLAGS without changing the content.
467
+ if (reg.size() != 4 || reg.size() >= workRegSize) {
468
+ if (imm.value() == 0)
469
+ sameRegHint = InstSameRegHint::kRO;
470
+ }
471
+ break;
472
+ }
473
+ }
474
+ }
475
+
476
+ switch (sameRegHint) {
477
+ case InstSameRegHint::kNone:
478
+ break;
479
+ case InstSameRegHint::kRO:
480
+ ib[0]->makeReadOnly();
481
+ break;
482
+ case InstSameRegHint::kWO:
483
+ ib[0]->makeWriteOnly();
484
+ break;
485
+ }
486
+ }
487
+
488
+ cf = instInfo.controlFlow();
489
+ }
490
+
491
+ return kErrorOk;
492
+ }
493
+
494
+ // x86::RACFGBuilder - OnInvoke
495
+ // ============================
496
+
497
+ Error RACFGBuilder::onBeforeInvoke(InvokeNode* invokeNode) noexcept {
498
+ const FuncDetail& fd = invokeNode->detail();
499
+ uint32_t argCount = invokeNode->argCount();
500
+
501
+ cc()->_setCursor(invokeNode->prev());
502
+ RegType nativeRegType = cc()->_gpSignature.regType();
503
+
504
+ for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
505
+ const FuncValuePack& argPack = fd.argPack(argIndex);
506
+ for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
507
+ if (!argPack[valueIndex])
508
+ break;
509
+
510
+ const FuncValue& arg = argPack[valueIndex];
511
+ const Operand& op = invokeNode->arg(argIndex, valueIndex);
512
+
513
+ if (op.isNone())
514
+ continue;
515
+
516
+ if (op.isReg()) {
517
+ const Reg& reg = op.as<Reg>();
518
+ RAWorkReg* workReg;
519
+ ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg));
520
+
521
+ if (arg.isReg()) {
522
+ RegGroup regGroup = workReg->group();
523
+ RegGroup argGroup = Reg::groupOf(arg.regType());
524
+
525
+ if (arg.isIndirect()) {
526
+ if (reg.isGp()) {
527
+ if (reg.type() != nativeRegType)
528
+ return DebugUtils::errored(kErrorInvalidAssignment);
529
+ // It's considered allocated if this is an indirect argument and the user used GP.
530
+ continue;
531
+ }
532
+
533
+ BaseReg indirectReg;
534
+ moveVecToPtr(invokeNode, arg, reg.as<Vec>(), &indirectReg);
535
+ invokeNode->_args[argIndex][valueIndex] = indirectReg;
536
+ }
537
+ else {
538
+ if (regGroup != argGroup) {
539
+ // TODO: Conversion is not supported.
540
+ return DebugUtils::errored(kErrorInvalidAssignment);
541
+ }
542
+ }
543
+ }
544
+ else {
545
+ if (arg.isIndirect()) {
546
+ if (reg.isGp()) {
547
+ if (reg.type() != nativeRegType)
548
+ return DebugUtils::errored(kErrorInvalidAssignment);
549
+
550
+ ASMJIT_PROPAGATE(moveRegToStackArg(invokeNode, arg, reg));
551
+ continue;
552
+ }
553
+
554
+ BaseReg indirectReg;
555
+ moveVecToPtr(invokeNode, arg, reg.as<Vec>(), &indirectReg);
556
+ ASMJIT_PROPAGATE(moveRegToStackArg(invokeNode, arg, indirectReg));
557
+ }
558
+ else {
559
+ ASMJIT_PROPAGATE(moveRegToStackArg(invokeNode, arg, reg));
560
+ }
561
+ }
562
+ }
563
+ else if (op.isImm()) {
564
+ if (arg.isReg()) {
565
+ BaseReg reg;
566
+ ASMJIT_PROPAGATE(moveImmToRegArg(invokeNode, arg, op.as<Imm>(), &reg));
567
+ invokeNode->_args[argIndex][valueIndex] = reg;
568
+ }
569
+ else {
570
+ ASMJIT_PROPAGATE(moveImmToStackArg(invokeNode, arg, op.as<Imm>()));
571
+ }
572
+ }
573
+ }
574
+ }
575
+
576
+ cc()->_setCursor(invokeNode);
577
+ if (fd.hasFlag(CallConvFlags::kCalleePopsStack) && fd.argStackSize() != 0)
578
+ ASMJIT_PROPAGATE(cc()->sub(cc()->zsp(), fd.argStackSize()));
579
+
580
+ if (fd.hasRet()) {
581
+ for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
582
+ const FuncValue& ret = fd.ret(valueIndex);
583
+ if (!ret)
584
+ break;
585
+
586
+ const Operand& op = invokeNode->ret(valueIndex);
587
+ if (op.isReg()) {
588
+ const Reg& reg = op.as<Reg>();
589
+ RAWorkReg* workReg;
590
+ ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg));
591
+
592
+ if (ret.isReg()) {
593
+ if (ret.regType() == RegType::kX86_St) {
594
+ if (workReg->group() != RegGroup::kVec)
595
+ return DebugUtils::errored(kErrorInvalidAssignment);
596
+
597
+ Reg dst(workReg->signature(), workReg->virtId());
598
+ Mem mem;
599
+
600
+ TypeId typeId = TypeUtils::scalarOf(workReg->typeId());
601
+ if (ret.hasTypeId())
602
+ typeId = ret.typeId();
603
+
604
+ switch (typeId) {
605
+ case TypeId::kFloat32:
606
+ ASMJIT_PROPAGATE(_pass->useTemporaryMem(mem, 4, 4));
607
+ mem.setSize(4);
608
+ ASMJIT_PROPAGATE(cc()->fstp(mem));
609
+ ASMJIT_PROPAGATE(cc()->emit(choose(Inst::kIdMovss, Inst::kIdVmovss), dst.as<Xmm>(), mem));
610
+ break;
611
+
612
+ case TypeId::kFloat64:
613
+ ASMJIT_PROPAGATE(_pass->useTemporaryMem(mem, 8, 4));
614
+ mem.setSize(8);
615
+ ASMJIT_PROPAGATE(cc()->fstp(mem));
616
+ ASMJIT_PROPAGATE(cc()->emit(choose(Inst::kIdMovsd, Inst::kIdVmovsd), dst.as<Xmm>(), mem));
617
+ break;
618
+
619
+ default:
620
+ return DebugUtils::errored(kErrorInvalidAssignment);
621
+ }
622
+ }
623
+ else {
624
+ RegGroup regGroup = workReg->group();
625
+ RegGroup retGroup = Reg::groupOf(ret.regType());
626
+
627
+ if (regGroup != retGroup) {
628
+ // TODO: Conversion is not supported.
629
+ return DebugUtils::errored(kErrorInvalidAssignment);
630
+ }
631
+ }
632
+ }
633
+ }
634
+ }
635
+ }
636
+
637
+ // This block has function call(s).
638
+ _curBlock->addFlags(RABlockFlags::kHasFuncCalls);
639
+ _pass->func()->frame().addAttributes(FuncAttributes::kHasFuncCalls);
640
+ _pass->func()->frame().updateCallStackSize(fd.argStackSize());
641
+
642
+ return kErrorOk;
643
+ }
644
+
645
+ Error RACFGBuilder::onInvoke(InvokeNode* invokeNode, RAInstBuilder& ib) noexcept {
646
+ uint32_t argCount = invokeNode->argCount();
647
+ const FuncDetail& fd = invokeNode->detail();
648
+
649
+ for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
650
+ const FuncValuePack& argPack = fd.argPack(argIndex);
651
+ for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
652
+ if (!argPack[valueIndex])
653
+ continue;
654
+
655
+ const FuncValue& arg = argPack[valueIndex];
656
+ const Operand& op = invokeNode->arg(argIndex, valueIndex);
657
+
658
+ if (op.isNone())
659
+ continue;
660
+
661
+ if (op.isReg()) {
662
+ const Reg& reg = op.as<Reg>();
663
+ RAWorkReg* workReg;
664
+ ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg));
665
+
666
+ if (arg.isIndirect()) {
667
+ RegGroup regGroup = workReg->group();
668
+ if (regGroup != RegGroup::kGp)
669
+ return DebugUtils::errored(kErrorInvalidState);
670
+ ASMJIT_PROPAGATE(ib.addCallArg(workReg, arg.regId()));
671
+ }
672
+ else if (arg.isReg()) {
673
+ RegGroup regGroup = workReg->group();
674
+ RegGroup argGroup = Reg::groupOf(arg.regType());
675
+
676
+ if (regGroup == argGroup) {
677
+ ASMJIT_PROPAGATE(ib.addCallArg(workReg, arg.regId()));
678
+ }
679
+ }
680
+ }
681
+ }
682
+ }
683
+
684
+ for (uint32_t retIndex = 0; retIndex < Globals::kMaxValuePack; retIndex++) {
685
+ const FuncValue& ret = fd.ret(retIndex);
686
+ if (!ret)
687
+ break;
688
+
689
+ // Not handled here...
690
+ const Operand& op = invokeNode->ret(retIndex);
691
+ if (ret.regType() == RegType::kX86_St)
692
+ continue;
693
+
694
+ if (op.isReg()) {
695
+ const Reg& reg = op.as<Reg>();
696
+ RAWorkReg* workReg;
697
+ ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg));
698
+
699
+ if (ret.isReg()) {
700
+ RegGroup regGroup = workReg->group();
701
+ RegGroup retGroup = Reg::groupOf(ret.regType());
702
+
703
+ if (regGroup == retGroup) {
704
+ ASMJIT_PROPAGATE(ib.addCallRet(workReg, ret.regId()));
705
+ }
706
+ }
707
+ else {
708
+ return DebugUtils::errored(kErrorInvalidAssignment);
709
+ }
710
+ }
711
+ }
712
+
713
+ // Setup clobbered registers.
714
+ for (RegGroup group : RegGroupVirtValues{})
715
+ ib._clobbered[group] = Support::lsbMask<RegMask>(_pass->_physRegCount[group]) & ~fd.preservedRegs(group);
716
+
717
+ return kErrorOk;
718
+ }
719
+
720
+ // x86::RACFGBuilder - MoveVecToPtr
721
+ // ================================
722
+
723
+ static inline OperandSignature x86VecRegSignatureBySize(uint32_t size) noexcept {
724
+ return OperandSignature{size >= 64 ? uint32_t(Zmm::kSignature) :
725
+ size >= 32 ? uint32_t(Ymm::kSignature) : uint32_t(Xmm::kSignature)};
726
+ }
727
+
728
+ Error RACFGBuilder::moveVecToPtr(InvokeNode* invokeNode, const FuncValue& arg, const Vec& src, BaseReg* out) noexcept {
729
+ DebugUtils::unused(invokeNode);
730
+ ASMJIT_ASSERT(arg.isReg());
731
+
732
+ uint32_t argSize = TypeUtils::sizeOf(arg.typeId());
733
+ if (argSize == 0)
734
+ return DebugUtils::errored(kErrorInvalidState);
735
+
736
+ if (argSize < 16)
737
+ argSize = 16;
738
+
739
+ uint32_t argStackOffset = Support::alignUp(invokeNode->detail()._argStackSize, argSize);
740
+ _funcNode->frame().updateCallStackAlignment(argSize);
741
+ invokeNode->detail()._argStackSize = argStackOffset + argSize;
742
+
743
+ Vec vecReg(x86VecRegSignatureBySize(argSize), src.id());
744
+ Mem vecPtr = ptr(_pass->_sp.as<Gp>(), int32_t(argStackOffset));
745
+
746
+ uint32_t vMovInstId = choose(Inst::kIdMovaps, Inst::kIdVmovaps);
747
+ if (argSize > 16)
748
+ vMovInstId = Inst::kIdVmovaps;
749
+
750
+ ASMJIT_PROPAGATE(cc()->_newReg(out, ArchTraits::byArch(cc()->arch()).regTypeToTypeId(cc()->_gpSignature.regType()), nullptr));
751
+
752
+ VirtReg* vReg = cc()->virtRegById(out->id());
753
+ vReg->setWeight(BaseRAPass::kCallArgWeight);
754
+
755
+ ASMJIT_PROPAGATE(cc()->lea(out->as<Gp>(), vecPtr));
756
+ ASMJIT_PROPAGATE(cc()->emit(vMovInstId, ptr(out->as<Gp>()), vecReg));
757
+
758
+ if (arg.isStack()) {
759
+ Mem stackPtr = ptr(_pass->_sp.as<Gp>(), arg.stackOffset());
760
+ ASMJIT_PROPAGATE(cc()->mov(stackPtr, out->as<Gp>()));
761
+ }
762
+
763
+ return kErrorOk;
764
+ }
765
+
766
+ // x86::RACFGBuilder - MoveImmToRegArg
767
+ // ===================================
768
+
769
+ Error RACFGBuilder::moveImmToRegArg(InvokeNode* invokeNode, const FuncValue& arg, const Imm& imm_, BaseReg* out) noexcept {
770
+ DebugUtils::unused(invokeNode);
771
+ ASMJIT_ASSERT(arg.isReg());
772
+
773
+ Imm imm(imm_);
774
+ TypeId rTypeId = TypeId::kUInt32;
775
+
776
+ switch (arg.typeId()) {
777
+ case TypeId::kInt8: imm.signExtend8Bits(); goto MovU32;
778
+ case TypeId::kUInt8: imm.zeroExtend8Bits(); goto MovU32;
779
+ case TypeId::kInt16: imm.signExtend16Bits(); goto MovU32;
780
+ case TypeId::kUInt16: imm.zeroExtend16Bits(); goto MovU32;
781
+
782
+ case TypeId::kInt32:
783
+ case TypeId::kUInt32:
784
+ MovU32:
785
+ imm.zeroExtend32Bits();
786
+ break;
787
+
788
+ case TypeId::kInt64:
789
+ case TypeId::kUInt64:
790
+ // Moving to GPD automatically zero extends in 64-bit mode.
791
+ if (imm.isUInt32()) {
792
+ imm.zeroExtend32Bits();
793
+ break;
794
+ }
795
+
796
+ rTypeId = TypeId::kUInt64;
797
+ break;
798
+
799
+ default:
800
+ return DebugUtils::errored(kErrorInvalidAssignment);
801
+ }
802
+
803
+ ASMJIT_PROPAGATE(cc()->_newReg(out, rTypeId, nullptr));
804
+ cc()->virtRegById(out->id())->setWeight(BaseRAPass::kCallArgWeight);
805
+
806
+ return cc()->mov(out->as<x86::Gp>(), imm);
807
+ }
808
+
809
+ // x86::RACFGBuilder - MoveImmToStackArg
810
+ // =====================================
811
+
812
+ Error RACFGBuilder::moveImmToStackArg(InvokeNode* invokeNode, const FuncValue& arg, const Imm& imm_) noexcept {
813
+ DebugUtils::unused(invokeNode);
814
+ ASMJIT_ASSERT(arg.isStack());
815
+
816
+ Mem stackPtr = ptr(_pass->_sp.as<Gp>(), arg.stackOffset());
817
+ Imm imm[2];
818
+
819
+ stackPtr.setSize(4);
820
+ imm[0] = imm_;
821
+ uint32_t nMovs = 0;
822
+
823
+ // One stack entry has the same size as the native register size. That means that if we want to move a 32-bit
824
+ // integer on the stack in 64-bit mode, we need to extend it to a 64-bit integer first. In 32-bit mode, pushing
825
+ // a 64-bit on stack is done in two steps by pushing low and high parts separately.
826
+ switch (arg.typeId()) {
827
+ case TypeId::kInt8: imm[0].signExtend8Bits(); goto MovU32;
828
+ case TypeId::kUInt8: imm[0].zeroExtend8Bits(); goto MovU32;
829
+ case TypeId::kInt16: imm[0].signExtend16Bits(); goto MovU32;
830
+ case TypeId::kUInt16: imm[0].zeroExtend16Bits(); goto MovU32;
831
+
832
+ case TypeId::kInt32:
833
+ case TypeId::kUInt32:
834
+ case TypeId::kFloat32:
835
+ MovU32:
836
+ imm[0].zeroExtend32Bits();
837
+ nMovs = 1;
838
+ break;
839
+
840
+ case TypeId::kInt64:
841
+ case TypeId::kUInt64:
842
+ case TypeId::kFloat64:
843
+ case TypeId::kMmx32:
844
+ case TypeId::kMmx64:
845
+ if (_is64Bit && imm[0].isInt32()) {
846
+ stackPtr.setSize(8);
847
+ nMovs = 1;
848
+ break;
849
+ }
850
+
851
+ imm[1].setValue(imm[0].uint32Hi());
852
+ imm[0].zeroExtend32Bits();
853
+ nMovs = 2;
854
+ break;
855
+
856
+ default:
857
+ return DebugUtils::errored(kErrorInvalidAssignment);
858
+ }
859
+
860
+ for (uint32_t i = 0; i < nMovs; i++) {
861
+ ASMJIT_PROPAGATE(cc()->mov(stackPtr, imm[i]));
862
+ stackPtr.addOffsetLo32(int32_t(stackPtr.size()));
863
+ }
864
+
865
+ return kErrorOk;
866
+ }
867
+
868
+ // x86::RACFGBuilder - MoveRegToStackArg
869
+ // =====================================
870
+
871
+ Error RACFGBuilder::moveRegToStackArg(InvokeNode* invokeNode, const FuncValue& arg, const BaseReg& reg) noexcept {
872
+ DebugUtils::unused(invokeNode);
873
+ ASMJIT_ASSERT(arg.isStack());
874
+
875
+ Mem stackPtr = ptr(_pass->_sp.as<Gp>(), arg.stackOffset());
876
+ Reg r0, r1;
877
+
878
+ VirtReg* vr = cc()->virtRegById(reg.id());
879
+ uint32_t registerSize = cc()->registerSize();
880
+ InstId instId = 0;
881
+
882
+ TypeId dstTypeId = arg.typeId();
883
+ TypeId srcTypeId = vr->typeId();
884
+
885
+ switch (dstTypeId) {
886
+ case TypeId::kInt64:
887
+ case TypeId::kUInt64:
888
+ // Extend BYTE->QWORD (GP).
889
+ if (TypeUtils::isGp8(srcTypeId)) {
890
+ r1.setRegT<RegType::kX86_GpbLo>(reg.id());
891
+
892
+ instId = (dstTypeId == TypeId::kInt64 && srcTypeId == TypeId::kInt8) ? Inst::kIdMovsx : Inst::kIdMovzx;
893
+ goto ExtendMovGpXQ;
894
+ }
895
+
896
+ // Extend WORD->QWORD (GP).
897
+ if (TypeUtils::isGp16(srcTypeId)) {
898
+ r1.setRegT<RegType::kX86_Gpw>(reg.id());
899
+
900
+ instId = (dstTypeId == TypeId::kInt64 && srcTypeId == TypeId::kInt16) ? Inst::kIdMovsx : Inst::kIdMovzx;
901
+ goto ExtendMovGpXQ;
902
+ }
903
+
904
+ // Extend DWORD->QWORD (GP).
905
+ if (TypeUtils::isGp32(srcTypeId)) {
906
+ r1.setRegT<RegType::kX86_Gpd>(reg.id());
907
+
908
+ instId = Inst::kIdMovsxd;
909
+ if (dstTypeId == TypeId::kInt64 && srcTypeId == TypeId::kInt32)
910
+ goto ExtendMovGpXQ;
911
+ else
912
+ goto ZeroExtendGpDQ;
913
+ }
914
+
915
+ // Move QWORD (GP).
916
+ if (TypeUtils::isGp64(srcTypeId)) goto MovGpQ;
917
+ if (TypeUtils::isMmx(srcTypeId)) goto MovMmQ;
918
+ if (TypeUtils::isVec(srcTypeId)) goto MovXmmQ;
919
+ break;
920
+
921
+ case TypeId::kInt32:
922
+ case TypeId::kUInt32:
923
+ case TypeId::kInt16:
924
+ case TypeId::kUInt16:
925
+ // DWORD <- WORD (Zero|Sign Extend).
926
+ if (TypeUtils::isGp16(srcTypeId)) {
927
+ bool isDstSigned = dstTypeId == TypeId::kInt16 || dstTypeId == TypeId::kInt32;
928
+ bool isSrcSigned = srcTypeId == TypeId::kInt8 || srcTypeId == TypeId::kInt16;
929
+
930
+ r1.setRegT<RegType::kX86_Gpw>(reg.id());
931
+ instId = isDstSigned && isSrcSigned ? Inst::kIdMovsx : Inst::kIdMovzx;
932
+ goto ExtendMovGpD;
933
+ }
934
+
935
+ // DWORD <- BYTE (Zero|Sign Extend).
936
+ if (TypeUtils::isGp8(srcTypeId)) {
937
+ bool isDstSigned = dstTypeId == TypeId::kInt16 || dstTypeId == TypeId::kInt32;
938
+ bool isSrcSigned = srcTypeId == TypeId::kInt8 || srcTypeId == TypeId::kInt16;
939
+
940
+ r1.setRegT<RegType::kX86_GpbLo>(reg.id());
941
+ instId = isDstSigned && isSrcSigned ? Inst::kIdMovsx : Inst::kIdMovzx;
942
+ goto ExtendMovGpD;
943
+ }
944
+ ASMJIT_FALLTHROUGH;
945
+
946
+ case TypeId::kInt8:
947
+ case TypeId::kUInt8:
948
+ if (TypeUtils::isInt(srcTypeId)) goto MovGpD;
949
+ if (TypeUtils::isMmx(srcTypeId)) goto MovMmD;
950
+ if (TypeUtils::isVec(srcTypeId)) goto MovXmmD;
951
+ break;
952
+
953
+ case TypeId::kMmx32:
954
+ case TypeId::kMmx64:
955
+ // Extend BYTE->QWORD (GP).
956
+ if (TypeUtils::isGp8(srcTypeId)) {
957
+ r1.setRegT<RegType::kX86_GpbLo>(reg.id());
958
+
959
+ instId = Inst::kIdMovzx;
960
+ goto ExtendMovGpXQ;
961
+ }
962
+
963
+ // Extend WORD->QWORD (GP).
964
+ if (TypeUtils::isGp16(srcTypeId)) {
965
+ r1.setRegT<RegType::kX86_Gpw>(reg.id());
966
+
967
+ instId = Inst::kIdMovzx;
968
+ goto ExtendMovGpXQ;
969
+ }
970
+
971
+ if (TypeUtils::isGp32(srcTypeId)) goto ExtendMovGpDQ;
972
+ if (TypeUtils::isGp64(srcTypeId)) goto MovGpQ;
973
+ if (TypeUtils::isMmx(srcTypeId)) goto MovMmQ;
974
+ if (TypeUtils::isVec(srcTypeId)) goto MovXmmQ;
975
+ break;
976
+
977
+ case TypeId::kFloat32:
978
+ case TypeId::kFloat32x1:
979
+ if (TypeUtils::isVec(srcTypeId)) goto MovXmmD;
980
+ break;
981
+
982
+ case TypeId::kFloat64:
983
+ case TypeId::kFloat64x1:
984
+ if (TypeUtils::isVec(srcTypeId)) goto MovXmmQ;
985
+ break;
986
+
987
+ default:
988
+ if (TypeUtils::isVec(dstTypeId) && reg.as<Reg>().isVec()) {
989
+ stackPtr.setSize(TypeUtils::sizeOf(dstTypeId));
990
+ uint32_t vMovInstId = choose(Inst::kIdMovaps, Inst::kIdVmovaps);
991
+
992
+ if (TypeUtils::isVec128(dstTypeId))
993
+ r0.setRegT<RegType::kX86_Xmm>(reg.id());
994
+ else if (TypeUtils::isVec256(dstTypeId))
995
+ r0.setRegT<RegType::kX86_Ymm>(reg.id());
996
+ else if (TypeUtils::isVec512(dstTypeId))
997
+ r0.setRegT<RegType::kX86_Zmm>(reg.id());
998
+ else
999
+ break;
1000
+
1001
+ return cc()->emit(vMovInstId, stackPtr, r0);
1002
+ }
1003
+ break;
1004
+ }
1005
+ return DebugUtils::errored(kErrorInvalidAssignment);
1006
+
1007
+ // Extend+Move Gp.
1008
+ ExtendMovGpD:
1009
+ stackPtr.setSize(4);
1010
+ r0.setRegT<RegType::kX86_Gpd>(reg.id());
1011
+
1012
+ ASMJIT_PROPAGATE(cc()->emit(instId, r0, r1));
1013
+ ASMJIT_PROPAGATE(cc()->emit(Inst::kIdMov, stackPtr, r0));
1014
+ return kErrorOk;
1015
+
1016
+ ExtendMovGpXQ:
1017
+ if (registerSize == 8) {
1018
+ stackPtr.setSize(8);
1019
+ r0.setRegT<RegType::kX86_Gpq>(reg.id());
1020
+
1021
+ ASMJIT_PROPAGATE(cc()->emit(instId, r0, r1));
1022
+ ASMJIT_PROPAGATE(cc()->emit(Inst::kIdMov, stackPtr, r0));
1023
+ }
1024
+ else {
1025
+ stackPtr.setSize(4);
1026
+ r0.setRegT<RegType::kX86_Gpd>(reg.id());
1027
+
1028
+ ASMJIT_PROPAGATE(cc()->emit(instId, r0, r1));
1029
+
1030
+ ExtendMovGpDQ:
1031
+ ASMJIT_PROPAGATE(cc()->emit(Inst::kIdMov, stackPtr, r0));
1032
+ stackPtr.addOffsetLo32(4);
1033
+ ASMJIT_PROPAGATE(cc()->emit(Inst::kIdAnd, stackPtr, 0));
1034
+ }
1035
+ return kErrorOk;
1036
+
1037
+ ZeroExtendGpDQ:
1038
+ stackPtr.setSize(4);
1039
+ r0.setRegT<RegType::kX86_Gpd>(reg.id());
1040
+ goto ExtendMovGpDQ;
1041
+
1042
+ MovGpD:
1043
+ stackPtr.setSize(4);
1044
+ r0.setRegT<RegType::kX86_Gpd>(reg.id());
1045
+ return cc()->emit(Inst::kIdMov, stackPtr, r0);
1046
+
1047
+ MovGpQ:
1048
+ stackPtr.setSize(8);
1049
+ r0.setRegT<RegType::kX86_Gpq>(reg.id());
1050
+ return cc()->emit(Inst::kIdMov, stackPtr, r0);
1051
+
1052
+ MovMmD:
1053
+ stackPtr.setSize(4);
1054
+ r0.setRegT<RegType::kX86_Mm>(reg.id());
1055
+ return cc()->emit(choose(Inst::kIdMovd, Inst::kIdVmovd), stackPtr, r0);
1056
+
1057
+ MovMmQ:
1058
+ stackPtr.setSize(8);
1059
+ r0.setRegT<RegType::kX86_Mm>(reg.id());
1060
+ return cc()->emit(choose(Inst::kIdMovq, Inst::kIdVmovq), stackPtr, r0);
1061
+
1062
+ MovXmmD:
1063
+ stackPtr.setSize(4);
1064
+ r0.setRegT<RegType::kX86_Xmm>(reg.id());
1065
+ return cc()->emit(choose(Inst::kIdMovss, Inst::kIdVmovss), stackPtr, r0);
1066
+
1067
+ MovXmmQ:
1068
+ stackPtr.setSize(8);
1069
+ r0.setRegT<RegType::kX86_Xmm>(reg.id());
1070
+ return cc()->emit(choose(Inst::kIdMovlps, Inst::kIdVmovlps), stackPtr, r0);
1071
+ }
1072
+
1073
+ // x86::RACFGBuilder - OnReg
1074
+ // =========================
1075
+
1076
+ Error RACFGBuilder::onBeforeRet(FuncRetNode* funcRet) noexcept {
1077
+ const FuncDetail& funcDetail = _pass->func()->detail();
1078
+ const Operand* opArray = funcRet->operands();
1079
+ uint32_t opCount = funcRet->opCount();
1080
+
1081
+ cc()->_setCursor(funcRet->prev());
1082
+
1083
+ for (uint32_t i = 0; i < opCount; i++) {
1084
+ const Operand& op = opArray[i];
1085
+ const FuncValue& ret = funcDetail.ret(i);
1086
+
1087
+ if (!op.isReg())
1088
+ continue;
1089
+
1090
+ if (ret.regType() == RegType::kX86_St) {
1091
+ const Reg& reg = op.as<Reg>();
1092
+ uint32_t vIndex = Operand::virtIdToIndex(reg.id());
1093
+
1094
+ if (vIndex < Operand::kVirtIdCount) {
1095
+ RAWorkReg* workReg;
1096
+ ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
1097
+
1098
+ if (workReg->group() != RegGroup::kVec)
1099
+ return DebugUtils::errored(kErrorInvalidAssignment);
1100
+
1101
+ Reg src(workReg->signature(), workReg->virtId());
1102
+ Mem mem;
1103
+
1104
+ TypeId typeId = TypeUtils::scalarOf(workReg->typeId());
1105
+ if (ret.hasTypeId())
1106
+ typeId = ret.typeId();
1107
+
1108
+ switch (typeId) {
1109
+ case TypeId::kFloat32:
1110
+ ASMJIT_PROPAGATE(_pass->useTemporaryMem(mem, 4, 4));
1111
+ mem.setSize(4);
1112
+ ASMJIT_PROPAGATE(cc()->emit(choose(Inst::kIdMovss, Inst::kIdVmovss), mem, src.as<Xmm>()));
1113
+ ASMJIT_PROPAGATE(cc()->fld(mem));
1114
+ break;
1115
+
1116
+ case TypeId::kFloat64:
1117
+ ASMJIT_PROPAGATE(_pass->useTemporaryMem(mem, 8, 4));
1118
+ mem.setSize(8);
1119
+ ASMJIT_PROPAGATE(cc()->emit(choose(Inst::kIdMovsd, Inst::kIdVmovsd), mem, src.as<Xmm>()));
1120
+ ASMJIT_PROPAGATE(cc()->fld(mem));
1121
+ break;
1122
+
1123
+ default:
1124
+ return DebugUtils::errored(kErrorInvalidAssignment);
1125
+ }
1126
+ }
1127
+ }
1128
+ }
1129
+
1130
+ return kErrorOk;
1131
+ }
1132
+
1133
+ Error RACFGBuilder::onRet(FuncRetNode* funcRet, RAInstBuilder& ib) noexcept {
1134
+ const FuncDetail& funcDetail = _pass->func()->detail();
1135
+ const Operand* opArray = funcRet->operands();
1136
+ uint32_t opCount = funcRet->opCount();
1137
+
1138
+ for (uint32_t i = 0; i < opCount; i++) {
1139
+ const Operand& op = opArray[i];
1140
+ if (op.isNone()) continue;
1141
+
1142
+ const FuncValue& ret = funcDetail.ret(i);
1143
+ if (ASMJIT_UNLIKELY(!ret.isReg()))
1144
+ return DebugUtils::errored(kErrorInvalidAssignment);
1145
+
1146
+ // Not handled here...
1147
+ if (ret.regType() == RegType::kX86_St)
1148
+ continue;
1149
+
1150
+ if (op.isReg()) {
1151
+ // Register return value.
1152
+ const Reg& reg = op.as<Reg>();
1153
+ uint32_t vIndex = Operand::virtIdToIndex(reg.id());
1154
+
1155
+ if (vIndex < Operand::kVirtIdCount) {
1156
+ RAWorkReg* workReg;
1157
+ ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
1158
+
1159
+ RegGroup group = workReg->group();
1160
+ RegMask inOutRegs = _pass->_availableRegs[group];
1161
+ ASMJIT_PROPAGATE(ib.add(workReg, RATiedFlags::kUse | RATiedFlags::kRead, inOutRegs, ret.regId(), 0, inOutRegs, BaseReg::kIdBad, 0));
1162
+ }
1163
+ }
1164
+ else {
1165
+ return DebugUtils::errored(kErrorInvalidAssignment);
1166
+ }
1167
+ }
1168
+
1169
+ return kErrorOk;
1170
+ }
1171
+
1172
+ // x86::X86RAPass - Construction & Destruction
1173
+ // ===========================================
1174
+
1175
+ X86RAPass::X86RAPass() noexcept
1176
+ : BaseRAPass() { _iEmitHelper = &_emitHelper; }
1177
+ X86RAPass::~X86RAPass() noexcept {}
1178
+
1179
+ // x86::X86RAPass - OnInit & OnDone
1180
+ // ================================
1181
+
1182
+ void X86RAPass::onInit() noexcept {
1183
+ Arch arch = cc()->arch();
1184
+ uint32_t baseRegCount = Environment::is32Bit(arch) ? 8u : 16u;
1185
+ uint32_t simdRegCount = baseRegCount;
1186
+
1187
+ if (Environment::is64Bit(arch) && _func->frame().isAvx512Enabled())
1188
+ simdRegCount = 32u;
1189
+
1190
+ bool avxEnabled = _func->frame().isAvxEnabled();
1191
+ bool avx512Enabled = _func->frame().isAvx512Enabled();
1192
+
1193
+ _emitHelper._emitter = _cb;
1194
+ _emitHelper._avxEnabled = avxEnabled || avx512Enabled;
1195
+ _emitHelper._avx512Enabled = avx512Enabled;
1196
+
1197
+ _archTraits = &ArchTraits::byArch(arch);
1198
+ _physRegCount.set(RegGroup::kGp, baseRegCount);
1199
+ _physRegCount.set(RegGroup::kVec, simdRegCount);
1200
+ _physRegCount.set(RegGroup::kX86_K, 8);
1201
+ _physRegCount.set(RegGroup::kX86_MM, 8);
1202
+ _buildPhysIndex();
1203
+
1204
+ _availableRegCount = _physRegCount;
1205
+ _availableRegs[RegGroup::kGp] = Support::lsbMask<RegMask>(_physRegCount.get(RegGroup::kGp));
1206
+ _availableRegs[RegGroup::kVec] = Support::lsbMask<RegMask>(_physRegCount.get(RegGroup::kVec));
1207
+ _availableRegs[RegGroup::kX86_K] = Support::lsbMask<RegMask>(_physRegCount.get(RegGroup::kX86_K)) ^ 1u;
1208
+ _availableRegs[RegGroup::kX86_MM] = Support::lsbMask<RegMask>(_physRegCount.get(RegGroup::kX86_MM));
1209
+
1210
+ _scratchRegIndexes[0] = uint8_t(Gp::kIdCx);
1211
+ _scratchRegIndexes[1] = uint8_t(baseRegCount - 1);
1212
+
1213
+ // The architecture specific setup makes implicitly all registers available. So
1214
+ // make unavailable all registers that are special and cannot be used in general.
1215
+ bool hasFP = _func->frame().hasPreservedFP();
1216
+
1217
+ makeUnavailable(RegGroup::kGp, Gp::kIdSp); // ESP|RSP used as a stack-pointer (SP).
1218
+ if (hasFP) makeUnavailable(RegGroup::kGp, Gp::kIdBp); // EBP|RBP used as a frame-pointer (FP).
1219
+
1220
+ _sp = cc()->zsp();
1221
+ _fp = cc()->zbp();
1222
+ }
1223
+
1224
+ void X86RAPass::onDone() noexcept {}
1225
+
1226
+ // x86::X86RAPass - BuildCFG
1227
+ // =========================
1228
+
1229
+ Error X86RAPass::buildCFG() noexcept {
1230
+ return RACFGBuilder(this).run();
1231
+ }
1232
+
1233
+ // x86::X86RAPass - Rewrite
1234
+ // ========================
1235
+
1236
+ static InstId transformVexToEvex(InstId instId) {
1237
+ switch (instId) {
1238
+ case Inst::kIdVbroadcastf128: return Inst::kIdVbroadcastf32x4;
1239
+ case Inst::kIdVbroadcasti128: return Inst::kIdVbroadcasti32x4;
1240
+ case Inst::kIdVextractf128: return Inst::kIdVextractf32x4;
1241
+ case Inst::kIdVextracti128: return Inst::kIdVextracti32x4;
1242
+ case Inst::kIdVinsertf128: return Inst::kIdVinsertf32x4;
1243
+ case Inst::kIdVinserti128: return Inst::kIdVinserti32x4;
1244
+ case Inst::kIdVmovdqa: return Inst::kIdVmovdqa32;
1245
+ case Inst::kIdVmovdqu: return Inst::kIdVmovdqu32;
1246
+ case Inst::kIdVpand: return Inst::kIdVpandd;
1247
+ case Inst::kIdVpandn: return Inst::kIdVpandnd;
1248
+ case Inst::kIdVpor: return Inst::kIdVpord;
1249
+ case Inst::kIdVpxor: return Inst::kIdVpxord;
1250
+ case Inst::kIdVroundpd: return Inst::kIdVrndscalepd;
1251
+ case Inst::kIdVroundps: return Inst::kIdVrndscaleps;
1252
+ case Inst::kIdVroundsd: return Inst::kIdVrndscalesd;
1253
+ case Inst::kIdVroundss: return Inst::kIdVrndscaless;
1254
+
1255
+ default:
1256
+ // This should never happen as only transformable instructions should go this path.
1257
+ ASMJIT_ASSERT(false);
1258
+ return 0;
1259
+ }
1260
+ }
1261
+
1262
+ ASMJIT_FAVOR_SPEED Error X86RAPass::_rewrite(BaseNode* first, BaseNode* stop) noexcept {
1263
+ uint32_t virtCount = cc()->_vRegArray.size();
1264
+
1265
+ BaseNode* node = first;
1266
+ while (node != stop) {
1267
+ BaseNode* next = node->next();
1268
+ if (node->isInst()) {
1269
+ InstNode* inst = node->as<InstNode>();
1270
+ RAInst* raInst = node->passData<RAInst>();
1271
+
1272
+ Operand* operands = inst->operands();
1273
+ uint32_t opCount = inst->opCount();
1274
+ uint32_t maxRegId = 0;
1275
+
1276
+ uint32_t i;
1277
+
1278
+ // Rewrite virtual registers into physical registers.
1279
+ if (raInst) {
1280
+ // This data is allocated by Zone passed to `runOnFunction()`, which will be reset after the RA pass finishes.
1281
+ // So reset this data to prevent having a dead pointer after the RA pass is complete.
1282
+ node->resetPassData();
1283
+
1284
+ // If the instruction contains pass data (raInst) then it was a subject for register allocation and must be
1285
+ // rewritten to use physical regs.
1286
+ RATiedReg* tiedRegs = raInst->tiedRegs();
1287
+ uint32_t tiedCount = raInst->tiedCount();
1288
+
1289
+ for (i = 0; i < tiedCount; i++) {
1290
+ RATiedReg* tiedReg = &tiedRegs[i];
1291
+
1292
+ Support::BitWordIterator<uint32_t> useIt(tiedReg->useRewriteMask());
1293
+ uint32_t useId = tiedReg->useId();
1294
+ while (useIt.hasNext()) {
1295
+ maxRegId = Support::max(maxRegId, useId);
1296
+ inst->rewriteIdAtIndex(useIt.next(), useId);
1297
+ }
1298
+
1299
+ Support::BitWordIterator<uint32_t> outIt(tiedReg->outRewriteMask());
1300
+ uint32_t outId = tiedReg->outId();
1301
+ while (outIt.hasNext()) {
1302
+ maxRegId = Support::max(maxRegId, outId);
1303
+ inst->rewriteIdAtIndex(outIt.next(), outId);
1304
+ }
1305
+ }
1306
+
1307
+ // Transform VEX instruction to EVEX when necessary.
1308
+ if (raInst->isTransformable()) {
1309
+ if (maxRegId > 15) {
1310
+ inst->setId(transformVexToEvex(inst->id()));
1311
+ }
1312
+ }
1313
+
1314
+ // Remove moves that do not do anything.
1315
+ //
1316
+ // Usually these moves are inserted during code generation and originally they used different registers. If RA
1317
+ // allocated these into the same register such redundant mov would appear.
1318
+ if (raInst->hasInstRWFlag(InstRWFlags::kMovOp) && !inst->hasExtraReg()) {
1319
+ if (inst->opCount() == 2) {
1320
+ if (inst->op(0) == inst->op(1)) {
1321
+ cc()->removeNode(node);
1322
+ goto Next;
1323
+ }
1324
+ }
1325
+ }
1326
+
1327
+ if (ASMJIT_UNLIKELY(node->type() != NodeType::kInst)) {
1328
+ // FuncRet terminates the flow, it must either be removed if the exit label is next to it (optimization) or
1329
+ // patched to an architecture dependent jump instruction that jumps to the function's exit before the epilog.
1330
+ if (node->type() == NodeType::kFuncRet) {
1331
+ RABlock* block = raInst->block();
1332
+ if (!isNextTo(node, _func->exitNode())) {
1333
+ cc()->_setCursor(node->prev());
1334
+ ASMJIT_PROPAGATE(emitJump(_func->exitNode()->label()));
1335
+ }
1336
+
1337
+ BaseNode* prev = node->prev();
1338
+ cc()->removeNode(node);
1339
+ block->setLast(prev);
1340
+ }
1341
+ }
1342
+ }
1343
+
1344
+ // Rewrite stack slot addresses.
1345
+ for (i = 0; i < opCount; i++) {
1346
+ Operand& op = operands[i];
1347
+ if (op.isMem()) {
1348
+ BaseMem& mem = op.as<BaseMem>();
1349
+ if (mem.isRegHome()) {
1350
+ uint32_t virtIndex = Operand::virtIdToIndex(mem.baseId());
1351
+ if (ASMJIT_UNLIKELY(virtIndex >= virtCount))
1352
+ return DebugUtils::errored(kErrorInvalidVirtId);
1353
+
1354
+ VirtReg* virtReg = cc()->virtRegByIndex(virtIndex);
1355
+ RAWorkReg* workReg = virtReg->workReg();
1356
+ ASMJIT_ASSERT(workReg != nullptr);
1357
+
1358
+ RAStackSlot* slot = workReg->stackSlot();
1359
+ int32_t offset = slot->offset();
1360
+
1361
+ mem._setBase(_sp.type(), slot->baseRegId());
1362
+ mem.clearRegHome();
1363
+ mem.addOffsetLo32(offset);
1364
+ }
1365
+ }
1366
+ }
1367
+ }
1368
+
1369
+ Next:
1370
+ node = next;
1371
+ }
1372
+
1373
+ return kErrorOk;
1374
+ }
1375
+
1376
+ // x86::X86RAPass - OnEmit
1377
+ // =======================
1378
+
1379
+ Error X86RAPass::emitMove(uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept {
1380
+ RAWorkReg* wReg = workRegById(workId);
1381
+ BaseReg dst(wReg->signature(), dstPhysId);
1382
+ BaseReg src(wReg->signature(), srcPhysId);
1383
+
1384
+ const char* comment = nullptr;
1385
+
1386
+ #ifndef ASMJIT_NO_LOGGING
1387
+ if (hasDiagnosticOption(DiagnosticOptions::kRAAnnotate)) {
1388
+ _tmpString.assignFormat("<MOVE> %s", workRegById(workId)->name());
1389
+ comment = _tmpString.data();
1390
+ }
1391
+ #endif
1392
+
1393
+ return _emitHelper.emitRegMove(dst, src, wReg->typeId(), comment);
1394
+ }
1395
+
1396
+ Error X86RAPass::emitSwap(uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept {
1397
+ RAWorkReg* waReg = workRegById(aWorkId);
1398
+ RAWorkReg* wbReg = workRegById(bWorkId);
1399
+
1400
+ bool is64Bit = Support::max(waReg->typeId(), wbReg->typeId()) >= TypeId::kInt64;
1401
+ OperandSignature sign = is64Bit ? OperandSignature{RegTraits<RegType::kX86_Gpq>::kSignature}
1402
+ : OperandSignature{RegTraits<RegType::kX86_Gpd>::kSignature};
1403
+
1404
+ #ifndef ASMJIT_NO_LOGGING
1405
+ if (hasDiagnosticOption(DiagnosticOptions::kRAAnnotate)) {
1406
+ _tmpString.assignFormat("<SWAP> %s, %s", waReg->name(), wbReg->name());
1407
+ cc()->setInlineComment(_tmpString.data());
1408
+ }
1409
+ #endif
1410
+
1411
+ return cc()->emit(Inst::kIdXchg, Reg(sign, aPhysId), Reg(sign, bPhysId));
1412
+ }
1413
+
1414
+ Error X86RAPass::emitLoad(uint32_t workId, uint32_t dstPhysId) noexcept {
1415
+ RAWorkReg* wReg = workRegById(workId);
1416
+ BaseReg dstReg(wReg->signature(), dstPhysId);
1417
+ BaseMem srcMem(workRegAsMem(wReg));
1418
+
1419
+ const char* comment = nullptr;
1420
+
1421
+ #ifndef ASMJIT_NO_LOGGING
1422
+ if (hasDiagnosticOption(DiagnosticOptions::kRAAnnotate)) {
1423
+ _tmpString.assignFormat("<LOAD> %s", workRegById(workId)->name());
1424
+ comment = _tmpString.data();
1425
+ }
1426
+ #endif
1427
+
1428
+ return _emitHelper.emitRegMove(dstReg, srcMem, wReg->typeId(), comment);
1429
+ }
1430
+
1431
+ Error X86RAPass::emitSave(uint32_t workId, uint32_t srcPhysId) noexcept {
1432
+ RAWorkReg* wReg = workRegById(workId);
1433
+ BaseMem dstMem(workRegAsMem(wReg));
1434
+ BaseReg srcReg(wReg->signature(), srcPhysId);
1435
+
1436
+ const char* comment = nullptr;
1437
+
1438
+ #ifndef ASMJIT_NO_LOGGING
1439
+ if (hasDiagnosticOption(DiagnosticOptions::kRAAnnotate)) {
1440
+ _tmpString.assignFormat("<SAVE> %s", workRegById(workId)->name());
1441
+ comment = _tmpString.data();
1442
+ }
1443
+ #endif
1444
+
1445
+ return _emitHelper.emitRegMove(dstMem, srcReg, wReg->typeId(), comment);
1446
+ }
1447
+
1448
+ Error X86RAPass::emitJump(const Label& label) noexcept {
1449
+ return cc()->jmp(label);
1450
+ }
1451
+
1452
+ Error X86RAPass::emitPreCall(InvokeNode* invokeNode) noexcept {
1453
+ if (invokeNode->detail().hasVarArgs() && cc()->is64Bit()) {
1454
+ const FuncDetail& fd = invokeNode->detail();
1455
+ uint32_t argCount = invokeNode->argCount();
1456
+
1457
+ switch (invokeNode->detail().callConv().id()) {
1458
+ case CallConvId::kX64SystemV: {
1459
+ // AL register contains the number of arguments passed in XMM register(s).
1460
+ uint32_t n = 0;
1461
+ for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
1462
+ const FuncValuePack& argPack = fd.argPack(argIndex);
1463
+ for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
1464
+ const FuncValue& arg = argPack[valueIndex];
1465
+ if (!arg)
1466
+ break;
1467
+
1468
+ if (arg.isReg() && Reg::groupOf(arg.regType()) == RegGroup::kVec)
1469
+ n++;
1470
+ }
1471
+ }
1472
+
1473
+ if (!n)
1474
+ ASMJIT_PROPAGATE(cc()->xor_(eax, eax));
1475
+ else
1476
+ ASMJIT_PROPAGATE(cc()->mov(eax, n));
1477
+ break;
1478
+ }
1479
+
1480
+ case CallConvId::kX64Windows: {
1481
+ // Each double-precision argument passed in XMM must be also passed in GP.
1482
+ for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
1483
+ const FuncValuePack& argPack = fd.argPack(argIndex);
1484
+ for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
1485
+ const FuncValue& arg = argPack[valueIndex];
1486
+ if (!arg)
1487
+ break;
1488
+
1489
+ if (arg.isReg() && Reg::groupOf(arg.regType()) == RegGroup::kVec) {
1490
+ Gp dst = gpq(fd.callConv().passedOrder(RegGroup::kGp)[argIndex]);
1491
+ Xmm src = xmm(arg.regId());
1492
+ ASMJIT_PROPAGATE(cc()->emit(choose(Inst::kIdMovq, Inst::kIdVmovq), dst, src));
1493
+ }
1494
+ }
1495
+ }
1496
+ break;
1497
+ }
1498
+
1499
+ default:
1500
+ return DebugUtils::errored(kErrorInvalidState);
1501
+ }
1502
+ }
1503
+
1504
+ return kErrorOk;
1505
+ }
1506
+
1507
+ ASMJIT_END_SUB_NAMESPACE
1508
+
1509
+ #endif // !ASMJIT_NO_X86 && !ASMJIT_NO_COMPILER