asmjit 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (201) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +1 -1
  3. data/asmjit.gemspec +1 -1
  4. data/ext/asmjit/asmjit/.editorconfig +10 -0
  5. data/ext/asmjit/asmjit/.github/FUNDING.yml +1 -0
  6. data/ext/asmjit/asmjit/.github/workflows/build-config.json +47 -0
  7. data/ext/asmjit/asmjit/.github/workflows/build.yml +156 -0
  8. data/ext/asmjit/asmjit/.gitignore +6 -0
  9. data/ext/asmjit/asmjit/CMakeLists.txt +611 -0
  10. data/ext/asmjit/asmjit/LICENSE.md +17 -0
  11. data/ext/asmjit/asmjit/README.md +69 -0
  12. data/ext/asmjit/asmjit/src/asmjit/a64.h +62 -0
  13. data/ext/asmjit/asmjit/src/asmjit/arm/a64archtraits_p.h +81 -0
  14. data/ext/asmjit/asmjit/src/asmjit/arm/a64assembler.cpp +5115 -0
  15. data/ext/asmjit/asmjit/src/asmjit/arm/a64assembler.h +72 -0
  16. data/ext/asmjit/asmjit/src/asmjit/arm/a64builder.cpp +51 -0
  17. data/ext/asmjit/asmjit/src/asmjit/arm/a64builder.h +57 -0
  18. data/ext/asmjit/asmjit/src/asmjit/arm/a64compiler.cpp +60 -0
  19. data/ext/asmjit/asmjit/src/asmjit/arm/a64compiler.h +247 -0
  20. data/ext/asmjit/asmjit/src/asmjit/arm/a64emithelper.cpp +464 -0
  21. data/ext/asmjit/asmjit/src/asmjit/arm/a64emithelper_p.h +50 -0
  22. data/ext/asmjit/asmjit/src/asmjit/arm/a64emitter.h +1228 -0
  23. data/ext/asmjit/asmjit/src/asmjit/arm/a64formatter.cpp +298 -0
  24. data/ext/asmjit/asmjit/src/asmjit/arm/a64formatter_p.h +59 -0
  25. data/ext/asmjit/asmjit/src/asmjit/arm/a64func.cpp +189 -0
  26. data/ext/asmjit/asmjit/src/asmjit/arm/a64func_p.h +33 -0
  27. data/ext/asmjit/asmjit/src/asmjit/arm/a64globals.h +1894 -0
  28. data/ext/asmjit/asmjit/src/asmjit/arm/a64instapi.cpp +278 -0
  29. data/ext/asmjit/asmjit/src/asmjit/arm/a64instapi_p.h +41 -0
  30. data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb.cpp +1957 -0
  31. data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb.h +74 -0
  32. data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb_p.h +876 -0
  33. data/ext/asmjit/asmjit/src/asmjit/arm/a64operand.cpp +85 -0
  34. data/ext/asmjit/asmjit/src/asmjit/arm/a64operand.h +312 -0
  35. data/ext/asmjit/asmjit/src/asmjit/arm/a64rapass.cpp +852 -0
  36. data/ext/asmjit/asmjit/src/asmjit/arm/a64rapass_p.h +105 -0
  37. data/ext/asmjit/asmjit/src/asmjit/arm/a64utils.h +179 -0
  38. data/ext/asmjit/asmjit/src/asmjit/arm/armformatter.cpp +143 -0
  39. data/ext/asmjit/asmjit/src/asmjit/arm/armformatter_p.h +44 -0
  40. data/ext/asmjit/asmjit/src/asmjit/arm/armglobals.h +21 -0
  41. data/ext/asmjit/asmjit/src/asmjit/arm/armoperand.h +621 -0
  42. data/ext/asmjit/asmjit/src/asmjit/arm.h +62 -0
  43. data/ext/asmjit/asmjit/src/asmjit/asmjit-scope-begin.h +17 -0
  44. data/ext/asmjit/asmjit/src/asmjit/asmjit-scope-end.h +9 -0
  45. data/ext/asmjit/asmjit/src/asmjit/asmjit.h +33 -0
  46. data/ext/asmjit/asmjit/src/asmjit/core/api-build_p.h +55 -0
  47. data/ext/asmjit/asmjit/src/asmjit/core/api-config.h +613 -0
  48. data/ext/asmjit/asmjit/src/asmjit/core/archcommons.h +229 -0
  49. data/ext/asmjit/asmjit/src/asmjit/core/archtraits.cpp +160 -0
  50. data/ext/asmjit/asmjit/src/asmjit/core/archtraits.h +290 -0
  51. data/ext/asmjit/asmjit/src/asmjit/core/assembler.cpp +406 -0
  52. data/ext/asmjit/asmjit/src/asmjit/core/assembler.h +129 -0
  53. data/ext/asmjit/asmjit/src/asmjit/core/builder.cpp +889 -0
  54. data/ext/asmjit/asmjit/src/asmjit/core/builder.h +1391 -0
  55. data/ext/asmjit/asmjit/src/asmjit/core/codebuffer.h +113 -0
  56. data/ext/asmjit/asmjit/src/asmjit/core/codeholder.cpp +1149 -0
  57. data/ext/asmjit/asmjit/src/asmjit/core/codeholder.h +1035 -0
  58. data/ext/asmjit/asmjit/src/asmjit/core/codewriter.cpp +175 -0
  59. data/ext/asmjit/asmjit/src/asmjit/core/codewriter_p.h +179 -0
  60. data/ext/asmjit/asmjit/src/asmjit/core/compiler.cpp +582 -0
  61. data/ext/asmjit/asmjit/src/asmjit/core/compiler.h +737 -0
  62. data/ext/asmjit/asmjit/src/asmjit/core/compilerdefs.h +173 -0
  63. data/ext/asmjit/asmjit/src/asmjit/core/constpool.cpp +363 -0
  64. data/ext/asmjit/asmjit/src/asmjit/core/constpool.h +250 -0
  65. data/ext/asmjit/asmjit/src/asmjit/core/cpuinfo.cpp +1162 -0
  66. data/ext/asmjit/asmjit/src/asmjit/core/cpuinfo.h +813 -0
  67. data/ext/asmjit/asmjit/src/asmjit/core/emithelper.cpp +323 -0
  68. data/ext/asmjit/asmjit/src/asmjit/core/emithelper_p.h +58 -0
  69. data/ext/asmjit/asmjit/src/asmjit/core/emitter.cpp +333 -0
  70. data/ext/asmjit/asmjit/src/asmjit/core/emitter.h +741 -0
  71. data/ext/asmjit/asmjit/src/asmjit/core/emitterutils.cpp +129 -0
  72. data/ext/asmjit/asmjit/src/asmjit/core/emitterutils_p.h +89 -0
  73. data/ext/asmjit/asmjit/src/asmjit/core/environment.cpp +46 -0
  74. data/ext/asmjit/asmjit/src/asmjit/core/environment.h +508 -0
  75. data/ext/asmjit/asmjit/src/asmjit/core/errorhandler.cpp +14 -0
  76. data/ext/asmjit/asmjit/src/asmjit/core/errorhandler.h +228 -0
  77. data/ext/asmjit/asmjit/src/asmjit/core/formatter.cpp +584 -0
  78. data/ext/asmjit/asmjit/src/asmjit/core/formatter.h +247 -0
  79. data/ext/asmjit/asmjit/src/asmjit/core/formatter_p.h +34 -0
  80. data/ext/asmjit/asmjit/src/asmjit/core/func.cpp +286 -0
  81. data/ext/asmjit/asmjit/src/asmjit/core/func.h +1445 -0
  82. data/ext/asmjit/asmjit/src/asmjit/core/funcargscontext.cpp +293 -0
  83. data/ext/asmjit/asmjit/src/asmjit/core/funcargscontext_p.h +199 -0
  84. data/ext/asmjit/asmjit/src/asmjit/core/globals.cpp +133 -0
  85. data/ext/asmjit/asmjit/src/asmjit/core/globals.h +393 -0
  86. data/ext/asmjit/asmjit/src/asmjit/core/inst.cpp +113 -0
  87. data/ext/asmjit/asmjit/src/asmjit/core/inst.h +772 -0
  88. data/ext/asmjit/asmjit/src/asmjit/core/jitallocator.cpp +1242 -0
  89. data/ext/asmjit/asmjit/src/asmjit/core/jitallocator.h +261 -0
  90. data/ext/asmjit/asmjit/src/asmjit/core/jitruntime.cpp +80 -0
  91. data/ext/asmjit/asmjit/src/asmjit/core/jitruntime.h +89 -0
  92. data/ext/asmjit/asmjit/src/asmjit/core/logger.cpp +69 -0
  93. data/ext/asmjit/asmjit/src/asmjit/core/logger.h +198 -0
  94. data/ext/asmjit/asmjit/src/asmjit/core/misc_p.h +33 -0
  95. data/ext/asmjit/asmjit/src/asmjit/core/operand.cpp +132 -0
  96. data/ext/asmjit/asmjit/src/asmjit/core/operand.h +1611 -0
  97. data/ext/asmjit/asmjit/src/asmjit/core/osutils.cpp +84 -0
  98. data/ext/asmjit/asmjit/src/asmjit/core/osutils.h +61 -0
  99. data/ext/asmjit/asmjit/src/asmjit/core/osutils_p.h +68 -0
  100. data/ext/asmjit/asmjit/src/asmjit/core/raassignment_p.h +418 -0
  101. data/ext/asmjit/asmjit/src/asmjit/core/rabuilders_p.h +612 -0
  102. data/ext/asmjit/asmjit/src/asmjit/core/radefs_p.h +1204 -0
  103. data/ext/asmjit/asmjit/src/asmjit/core/ralocal.cpp +1166 -0
  104. data/ext/asmjit/asmjit/src/asmjit/core/ralocal_p.h +254 -0
  105. data/ext/asmjit/asmjit/src/asmjit/core/rapass.cpp +1969 -0
  106. data/ext/asmjit/asmjit/src/asmjit/core/rapass_p.h +1183 -0
  107. data/ext/asmjit/asmjit/src/asmjit/core/rastack.cpp +184 -0
  108. data/ext/asmjit/asmjit/src/asmjit/core/rastack_p.h +171 -0
  109. data/ext/asmjit/asmjit/src/asmjit/core/string.cpp +559 -0
  110. data/ext/asmjit/asmjit/src/asmjit/core/string.h +372 -0
  111. data/ext/asmjit/asmjit/src/asmjit/core/support.cpp +494 -0
  112. data/ext/asmjit/asmjit/src/asmjit/core/support.h +1773 -0
  113. data/ext/asmjit/asmjit/src/asmjit/core/target.cpp +14 -0
  114. data/ext/asmjit/asmjit/src/asmjit/core/target.h +53 -0
  115. data/ext/asmjit/asmjit/src/asmjit/core/type.cpp +74 -0
  116. data/ext/asmjit/asmjit/src/asmjit/core/type.h +419 -0
  117. data/ext/asmjit/asmjit/src/asmjit/core/virtmem.cpp +722 -0
  118. data/ext/asmjit/asmjit/src/asmjit/core/virtmem.h +242 -0
  119. data/ext/asmjit/asmjit/src/asmjit/core/zone.cpp +353 -0
  120. data/ext/asmjit/asmjit/src/asmjit/core/zone.h +615 -0
  121. data/ext/asmjit/asmjit/src/asmjit/core/zonehash.cpp +309 -0
  122. data/ext/asmjit/asmjit/src/asmjit/core/zonehash.h +186 -0
  123. data/ext/asmjit/asmjit/src/asmjit/core/zonelist.cpp +163 -0
  124. data/ext/asmjit/asmjit/src/asmjit/core/zonelist.h +209 -0
  125. data/ext/asmjit/asmjit/src/asmjit/core/zonestack.cpp +176 -0
  126. data/ext/asmjit/asmjit/src/asmjit/core/zonestack.h +239 -0
  127. data/ext/asmjit/asmjit/src/asmjit/core/zonestring.h +120 -0
  128. data/ext/asmjit/asmjit/src/asmjit/core/zonetree.cpp +99 -0
  129. data/ext/asmjit/asmjit/src/asmjit/core/zonetree.h +380 -0
  130. data/ext/asmjit/asmjit/src/asmjit/core/zonevector.cpp +356 -0
  131. data/ext/asmjit/asmjit/src/asmjit/core/zonevector.h +690 -0
  132. data/ext/asmjit/asmjit/src/asmjit/core.h +1861 -0
  133. data/ext/asmjit/asmjit/src/asmjit/x86/x86archtraits_p.h +148 -0
  134. data/ext/asmjit/asmjit/src/asmjit/x86/x86assembler.cpp +5110 -0
  135. data/ext/asmjit/asmjit/src/asmjit/x86/x86assembler.h +685 -0
  136. data/ext/asmjit/asmjit/src/asmjit/x86/x86builder.cpp +52 -0
  137. data/ext/asmjit/asmjit/src/asmjit/x86/x86builder.h +351 -0
  138. data/ext/asmjit/asmjit/src/asmjit/x86/x86compiler.cpp +61 -0
  139. data/ext/asmjit/asmjit/src/asmjit/x86/x86compiler.h +721 -0
  140. data/ext/asmjit/asmjit/src/asmjit/x86/x86emithelper.cpp +619 -0
  141. data/ext/asmjit/asmjit/src/asmjit/x86/x86emithelper_p.h +60 -0
  142. data/ext/asmjit/asmjit/src/asmjit/x86/x86emitter.h +4315 -0
  143. data/ext/asmjit/asmjit/src/asmjit/x86/x86formatter.cpp +944 -0
  144. data/ext/asmjit/asmjit/src/asmjit/x86/x86formatter_p.h +58 -0
  145. data/ext/asmjit/asmjit/src/asmjit/x86/x86func.cpp +503 -0
  146. data/ext/asmjit/asmjit/src/asmjit/x86/x86func_p.h +33 -0
  147. data/ext/asmjit/asmjit/src/asmjit/x86/x86globals.h +2169 -0
  148. data/ext/asmjit/asmjit/src/asmjit/x86/x86instapi.cpp +1732 -0
  149. data/ext/asmjit/asmjit/src/asmjit/x86/x86instapi_p.h +41 -0
  150. data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb.cpp +4427 -0
  151. data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb.h +563 -0
  152. data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb_p.h +311 -0
  153. data/ext/asmjit/asmjit/src/asmjit/x86/x86opcode_p.h +436 -0
  154. data/ext/asmjit/asmjit/src/asmjit/x86/x86operand.cpp +231 -0
  155. data/ext/asmjit/asmjit/src/asmjit/x86/x86operand.h +1085 -0
  156. data/ext/asmjit/asmjit/src/asmjit/x86/x86rapass.cpp +1509 -0
  157. data/ext/asmjit/asmjit/src/asmjit/x86/x86rapass_p.h +94 -0
  158. data/ext/asmjit/asmjit/src/asmjit/x86.h +93 -0
  159. data/ext/asmjit/asmjit/src/asmjit.natvis +245 -0
  160. data/ext/asmjit/asmjit/test/asmjit_test_assembler.cpp +84 -0
  161. data/ext/asmjit/asmjit/test/asmjit_test_assembler.h +85 -0
  162. data/ext/asmjit/asmjit/test/asmjit_test_assembler_a64.cpp +4006 -0
  163. data/ext/asmjit/asmjit/test/asmjit_test_assembler_x64.cpp +17833 -0
  164. data/ext/asmjit/asmjit/test/asmjit_test_assembler_x86.cpp +8300 -0
  165. data/ext/asmjit/asmjit/test/asmjit_test_compiler.cpp +253 -0
  166. data/ext/asmjit/asmjit/test/asmjit_test_compiler.h +73 -0
  167. data/ext/asmjit/asmjit/test/asmjit_test_compiler_a64.cpp +690 -0
  168. data/ext/asmjit/asmjit/test/asmjit_test_compiler_x86.cpp +4317 -0
  169. data/ext/asmjit/asmjit/test/asmjit_test_emitters.cpp +197 -0
  170. data/ext/asmjit/asmjit/test/asmjit_test_instinfo.cpp +181 -0
  171. data/ext/asmjit/asmjit/test/asmjit_test_misc.h +257 -0
  172. data/ext/asmjit/asmjit/test/asmjit_test_perf.cpp +62 -0
  173. data/ext/asmjit/asmjit/test/asmjit_test_perf.h +61 -0
  174. data/ext/asmjit/asmjit/test/asmjit_test_perf_a64.cpp +699 -0
  175. data/ext/asmjit/asmjit/test/asmjit_test_perf_x86.cpp +5032 -0
  176. data/ext/asmjit/asmjit/test/asmjit_test_unit.cpp +172 -0
  177. data/ext/asmjit/asmjit/test/asmjit_test_x86_sections.cpp +172 -0
  178. data/ext/asmjit/asmjit/test/asmjitutils.h +38 -0
  179. data/ext/asmjit/asmjit/test/broken.cpp +312 -0
  180. data/ext/asmjit/asmjit/test/broken.h +148 -0
  181. data/ext/asmjit/asmjit/test/cmdline.h +61 -0
  182. data/ext/asmjit/asmjit/test/performancetimer.h +41 -0
  183. data/ext/asmjit/asmjit/tools/configure-makefiles.sh +13 -0
  184. data/ext/asmjit/asmjit/tools/configure-ninja.sh +13 -0
  185. data/ext/asmjit/asmjit/tools/configure-sanitizers.sh +13 -0
  186. data/ext/asmjit/asmjit/tools/configure-vs2019-x64.bat +2 -0
  187. data/ext/asmjit/asmjit/tools/configure-vs2019-x86.bat +2 -0
  188. data/ext/asmjit/asmjit/tools/configure-vs2022-x64.bat +2 -0
  189. data/ext/asmjit/asmjit/tools/configure-vs2022-x86.bat +2 -0
  190. data/ext/asmjit/asmjit/tools/configure-xcode.sh +8 -0
  191. data/ext/asmjit/asmjit/tools/enumgen.js +417 -0
  192. data/ext/asmjit/asmjit/tools/enumgen.sh +3 -0
  193. data/ext/asmjit/asmjit/tools/tablegen-arm.js +365 -0
  194. data/ext/asmjit/asmjit/tools/tablegen-arm.sh +3 -0
  195. data/ext/asmjit/asmjit/tools/tablegen-x86.js +2638 -0
  196. data/ext/asmjit/asmjit/tools/tablegen-x86.sh +3 -0
  197. data/ext/asmjit/asmjit/tools/tablegen.js +947 -0
  198. data/ext/asmjit/asmjit/tools/tablegen.sh +4 -0
  199. data/ext/asmjit/asmjit.cc +18 -0
  200. data/lib/asmjit/version.rb +1 -1
  201. metadata +197 -2
@@ -0,0 +1,685 @@
1
+ // This file is part of AsmJit project <https://asmjit.com>
2
+ //
3
+ // See asmjit.h or LICENSE.md for license and copyright information
4
+ // SPDX-License-Identifier: Zlib
5
+
6
+ #ifndef ASMJIT_X86_X86ASSEMBLER_H_INCLUDED
7
+ #define ASMJIT_X86_X86ASSEMBLER_H_INCLUDED
8
+
9
+ #include "../core/assembler.h"
10
+ #include "../x86/x86emitter.h"
11
+ #include "../x86/x86operand.h"
12
+
13
+ ASMJIT_BEGIN_SUB_NAMESPACE(x86)
14
+
15
+ //! \addtogroup asmjit_x86
16
+ //! \{
17
+
18
+ //! X86/X64 assembler implementation.
19
+ //!
20
+ //! x86::Assembler is a code emitter that emits machine code directly into the \ref CodeBuffer. The assembler is capable
21
+ //! of targeting both 32-bit and 64-bit instruction sets, the instruction set can be configured through \ref CodeHolder.
22
+ //!
23
+ //! ### Basics
24
+ //!
25
+ //! The following example shows a basic use of `x86::Assembler`, how to generate a function that works in both 32-bit
26
+ //! and 64-bit modes, and how to connect \ref JitRuntime, \ref CodeHolder, and `x86::Assembler`.
27
+ //!
28
+ //! ```
29
+ //! #include <asmjit/x86.h>
30
+ //! #include <stdio.h>
31
+ //!
32
+ //! using namespace asmjit;
33
+ //!
34
+ //! // Signature of the generated function.
35
+ //! typedef int (*SumFunc)(const int* arr, size_t count);
36
+ //!
37
+ //! int main() {
38
+ //! JitRuntime rt; // Create a runtime specialized for JIT.
39
+ //! CodeHolder code; // Create a CodeHolder.
40
+ //!
41
+ //! code.init(rt.environment()); // Initialize code to match the JIT environment.
42
+ //! x86::Assembler a(&code); // Create and attach x86::Assembler to code.
43
+ //!
44
+ //! // Decide between 32-bit CDECL, WIN64, and SysV64 calling conventions:
45
+ //! // 32-BIT - passed all arguments by stack.
46
+ //! // WIN64 - passes first 4 arguments by RCX, RDX, R8, and R9.
47
+ //! // UNIX64 - passes first 6 arguments by RDI, RSI, RCX, RDX, R8, and R9.
48
+ //! x86::Gp arr, cnt;
49
+ //! x86::Gp sum = x86::eax; // Use EAX as 'sum' as it's a return register.
50
+ //!
51
+ //! if (ASMJIT_ARCH_BITS == 64) {
52
+ //! #if defined(_WIN32)
53
+ //! arr = x86::rcx; // First argument (array ptr).
54
+ //! cnt = x86::rdx; // Second argument (number of elements)
55
+ //! #else
56
+ //! arr = x86::rdi; // First argument (array ptr).
57
+ //! cnt = x86::rsi; // Second argument (number of elements)
58
+ //! #endif
59
+ //! }
60
+ //! else {
61
+ //! arr = x86::edx; // Use EDX to hold the array pointer.
62
+ //! cnt = x86::ecx; // Use ECX to hold the counter.
63
+ //! // Fetch first and second arguments from [ESP + 4] and [ESP + 8].
64
+ //! a.mov(arr, x86::ptr(x86::esp, 4));
65
+ //! a.mov(cnt, x86::ptr(x86::esp, 8));
66
+ //! }
67
+ //!
68
+ //! Label Loop = a.newLabel(); // To construct the loop, we need some labels.
69
+ //! Label Exit = a.newLabel();
70
+ //!
71
+ //! a.xor_(sum, sum); // Clear 'sum' register (shorter than 'mov').
72
+ //! a.test(cnt, cnt); // Border case:
73
+ //! a.jz(Exit); // If 'cnt' is zero jump to 'Exit' now.
74
+ //!
75
+ //! a.bind(Loop); // Start of a loop iteration.
76
+ //! a.add(sum, x86::dword_ptr(arr)); // Add int at [arr] to 'sum'.
77
+ //! a.add(arr, 4); // Increment 'arr' pointer.
78
+ //! a.dec(cnt); // Decrease 'cnt'.
79
+ //! a.jnz(Loop); // If not zero jump to 'Loop'.
80
+ //!
81
+ //! a.bind(Exit); // Exit to handle the border case.
82
+ //! a.ret(); // Return from function ('sum' == 'eax').
83
+ //! // ----> x86::Assembler is no longer needed from here and can be destroyed <----
84
+ //!
85
+ //! SumFunc fn;
86
+ //! Error err = rt.add(&fn, &code); // Add the generated code to the runtime.
87
+ //!
88
+ //! if (err) return 1; // Handle a possible error returned by AsmJit.
89
+ //! // ----> CodeHolder is no longer needed from here and can be destroyed <----
90
+ //!
91
+ //! static const int array[6] = { 4, 8, 15, 16, 23, 42 };
92
+ //!
93
+ //! int result = fn(array, 6); // Execute the generated code.
94
+ //! printf("%d\n", result); // Print sum of array (108).
95
+ //!
96
+ //! rt.release(fn); // Explicitly remove the function from the runtime
97
+ //! return 0; // Everything successful...
98
+ //! }
99
+ //! ```
100
+ //!
101
+ //! The example should be self-explanatory. It shows how to work with labels, how to use operands, and how to emit
102
+ //! instructions that can use different registers based on runtime selection. It implements 32-bit CDECL, WIN64,
103
+ //! and SysV64 caling conventions and will work on most X86/X64 environments.
104
+ //!
105
+ //! Although functions prologs / epilogs can be implemented manually, AsmJit provides utilities that can be used
106
+ //! to create function prologs and epilogs automatically, see \ref asmjit_function for more details.
107
+ //!
108
+ //! ### Instruction Validation
109
+ //!
110
+ //! Assembler prefers speed over strictness by default. The implementation checks the type of operands and fails
111
+ //! if the signature of types is invalid, however, it does only basic checks regarding registers and their groups
112
+ //! used in instructions. It's possible to pass operands that don't form any valid signature to the implementation
113
+ //! and succeed. This is usually not a problem as Assembler provides typed API so operand types are normally checked
114
+ //! by C++ compiler at compile time, however, Assembler is fully dynamic and its \ref emit() function can be called
115
+ //! with any instruction id, options, and operands. Moreover, it's also possible to form instructions that will be
116
+ //! accepted by the typed API, for example by calling `mov(x86::eax, x86::al)` - the C++ compiler won't see a problem
117
+ //! as both EAX and AL are \ref Gp registers.
118
+ //!
119
+ //! To help with common mistakes AsmJit allows to activate instruction validation. This feature instruments
120
+ //! the Assembler to call \ref InstAPI::validate() before it attempts to encode any instruction.
121
+ //!
122
+ //! The example below illustrates how validation can be turned on:
123
+ //!
124
+ //! ```
125
+ //! #include <asmjit/x86.h>
126
+ //! #include <stdio.h>
127
+ //!
128
+ //! using namespace asmjit;
129
+ //!
130
+ //! int main(int argc, char* argv[]) {
131
+ //! JitRuntime rt; // Create a runtime specialized for JIT.
132
+ //! CodeHolder code; // Create a CodeHolder.
133
+ //!
134
+ //! code.init(rt.environment()); // Initialize code to match the JIT environment.
135
+ //! x86::Assembler a(&code); // Create and attach x86::Assembler to code.
136
+ //!
137
+ //! // Enable strict validation.
138
+ //! a.addDiagnosticOptions(DiagnosticOptions::kValidateAssembler);
139
+ //!
140
+ //! // Try to encode invalid or ill-formed instructions.
141
+ //! Error err;
142
+ //!
143
+ //! // Invalid instruction.
144
+ //! err = a.mov(x86::eax, x86::al);
145
+ //! printf("Status: %s\n", DebugUtils::errorAsString(err));
146
+ //!
147
+ //! // Invalid instruction.
148
+ //! err = a.emit(x86::Inst::kIdMovss, x86::eax, x86::xmm0);
149
+ //! printf("Status: %s\n", DebugUtils::errorAsString(err));
150
+ //!
151
+ //! // Ambiguous operand size - the pointer requires size.
152
+ //! err = a.inc(x86::ptr(x86::rax), 1);
153
+ //! printf("Status: %s\n", DebugUtils::errorAsString(err));
154
+ //!
155
+ //! return 0;
156
+ //! }
157
+ //! ```
158
+ //!
159
+ //! ### Native Registers
160
+ //!
161
+ //! All emitters provide functions to construct machine-size registers depending on the target. This feature is
162
+ //! for users that want to write code targeting both 32-bit and 64-bit architectures at the same time. In AsmJit
163
+ //! terminology such registers have prefix `z`, so for example on X86 architecture the following native registers
164
+ //! are provided:
165
+ //!
166
+ //! - `zax` - mapped to either `eax` or `rax`
167
+ //! - `zbx` - mapped to either `ebx` or `rbx`
168
+ //! - `zcx` - mapped to either `ecx` or `rcx`
169
+ //! - `zdx` - mapped to either `edx` or `rdx`
170
+ //! - `zsp` - mapped to either `esp` or `rsp`
171
+ //! - `zbp` - mapped to either `ebp` or `rbp`
172
+ //! - `zsi` - mapped to either `esi` or `rsi`
173
+ //! - `zdi` - mapped to either `edi` or `rdi`
174
+ //!
175
+ //! They are accessible through \ref x86::Assembler, \ref x86::Builder, and \ref x86::Compiler. The example below
176
+ //! illustrates how to use this feature:
177
+ //!
178
+ //! ```
179
+ //! #include <asmjit/x86.h>
180
+ //! #include <stdio.h>
181
+ //!
182
+ //! using namespace asmjit;
183
+ //!
184
+ //! typedef int (*Func)(void);
185
+ //!
186
+ //! int main(int argc, char* argv[]) {
187
+ //! JitRuntime rt; // Create a runtime specialized for JIT.
188
+ //! CodeHolder code; // Create a CodeHolder.
189
+ //!
190
+ //! code.init(rt.environment()); // Initialize code to match the JIT environment.
191
+ //! x86::Assembler a(&code); // Create and attach x86::Assembler to code.
192
+ //!
193
+ //! // Let's get these registers from x86::Assembler.
194
+ //! x86::Gp zbp = a.zbp();
195
+ //! x86::Gp zsp = a.zsp();
196
+ //!
197
+ //! int stackSize = 32;
198
+ //!
199
+ //! // Function prolog.
200
+ //! a.push(zbp);
201
+ //! a.mov(zbp, zsp);
202
+ //! a.sub(zsp, stackSize);
203
+ //!
204
+ //! // ... emit some code (this just sets return value to zero) ...
205
+ //! a.xor_(x86::eax, x86::eax);
206
+ //!
207
+ //! // Function epilog and return.
208
+ //! a.mov(zsp, zbp);
209
+ //! a.pop(zbp);
210
+ //! a.ret();
211
+ //!
212
+ //! // To make the example complete let's call it.
213
+ //! Func fn;
214
+ //! Error err = rt.add(&fn, &code); // Add the generated code to the runtime.
215
+ //! if (err) return 1; // Handle a possible error returned by AsmJit.
216
+ //!
217
+ //! int result = fn(); // Execute the generated code.
218
+ //! printf("%d\n", result); // Print the resulting "0".
219
+ //!
220
+ //! rt.release(fn); // Remove the function from the runtime.
221
+ //! return 0;
222
+ //! }
223
+ //! ```
224
+ //!
225
+ //! The example just returns `0`, but the function generated contains a standard prolog and epilog sequence and the
226
+ //! function itself reserves 32 bytes of local stack. The advantage is clear - a single code-base can handle multiple
227
+ //! targets easily. If you want to create a register of native size dynamically by specifying its id it's also possible:
228
+ //!
229
+ //! ```
230
+ //! void example(x86::Assembler& a) {
231
+ //! x86::Gp zax = a.gpz(x86::Gp::kIdAx);
232
+ //! x86::Gp zbx = a.gpz(x86::Gp::kIdBx);
233
+ //! x86::Gp zcx = a.gpz(x86::Gp::kIdCx);
234
+ //! x86::Gp zdx = a.gpz(x86::Gp::kIdDx);
235
+ //!
236
+ //! // You can also change register's id easily.
237
+ //! x86::Gp zsp = zax;
238
+ //! zsp.setId(4); // or x86::Gp::kIdSp.
239
+ //! }
240
+ //! ```
241
+ //!
242
+ //! ### Data Embedding
243
+ //!
244
+ //! x86::Assembler extends the standard \ref BaseAssembler with X86/X64 specific conventions that are often used by
245
+ //! assemblers to embed data next to the code. The following functions can be used to embed data:
246
+ //!
247
+ //! - \ref BaseAssembler::embedInt8() - embeds int8_t (portable naming).
248
+ //! - \ref BaseAssembler::embedUInt8() - embeds uint8_t (portable naming).
249
+ //! - \ref BaseAssembler::embedInt16() - embeds int16_t (portable naming).
250
+ //! - \ref BaseAssembler::embedUInt16() - embeds uint16_t (portable naming).
251
+ //! - \ref BaseAssembler::embedInt32() - embeds int32_t (portable naming).
252
+ //! - \ref BaseAssembler::embedUInt32() - embeds uint32_t (portable naming).
253
+ //! - \ref BaseAssembler::embedInt64() - embeds int64_t (portable naming).
254
+ //! - \ref BaseAssembler::embedUInt64() - embeds uint64_t (portable naming).
255
+ //! - \ref BaseAssembler::embedFloat() - embeds float (portable naming).
256
+ //! - \ref BaseAssembler::embedDouble() - embeds double (portable naming).
257
+ //!
258
+ //! - \ref x86::Assembler::db() - embeds byte (8 bits) (x86 naming).
259
+ //! - \ref x86::Assembler::dw() - embeds word (16 bits) (x86 naming).
260
+ //! - \ref x86::Assembler::dd() - embeds dword (32 bits) (x86 naming).
261
+ //! - \ref x86::Assembler::dq() - embeds qword (64 bits) (x86 naming).
262
+ //!
263
+ //! The following example illustrates how embed works:
264
+ //!
265
+ //! ```
266
+ //! #include <asmjit/x86.h>
267
+ //! using namespace asmjit;
268
+ //!
269
+ //! void embedData(x86::Assembler& a) {
270
+ //! a.db(0xFF); // Embeds 0xFF byte.
271
+ //! a.dw(0xFF00); // Embeds 0xFF00 word (little-endian).
272
+ //! a.dd(0xFF000000); // Embeds 0xFF000000 dword (little-endian).
273
+ //! a.embedFloat(0.4f); // Embeds 0.4f (32-bit float, little-endian).
274
+ //! }
275
+ //! ```
276
+ //!
277
+ //! Sometimes it's required to read the data that is embedded after code, for example. This can be done through
278
+ //! \ref Label as shown below:
279
+ //!
280
+ //! ```
281
+ //! #include <asmjit/x86.h>
282
+ //! using namespace asmjit;
283
+ //!
284
+ //! void embedData(x86::Assembler& a, const Label& L_Data) {
285
+ //! x86::Gp addr = a.zax(); // EAX or RAX.
286
+ //! x86::Gp val = x86::edi; // Where to store some value...
287
+ //!
288
+ //! // Approach 1 - Load the address to register through LEA. This approach
289
+ //! // is flexible as the address can be then manipulated, for
290
+ //! // example if you have a data array, which would need index.
291
+ //! a.lea(addr, L_Data); // Loads the address of the label to EAX or RAX.
292
+ //! a.mov(val, dword_ptr(addr));
293
+ //!
294
+ //! // Approach 2 - Load the data directly by using L_Data in address. It's
295
+ //! // worth noting that this doesn't work with indexes in X64
296
+ //! // mode. It will use absolute address in 32-bit mode and
297
+ //! // relative address (RIP) in 64-bit mode.
298
+ //! a.mov(val, dword_ptr(L_Data));
299
+ //! }
300
+ //! ```
301
+ //!
302
+ //! ### Label Embedding
303
+ //!
304
+ //! It's also possible to embed labels. In general AsmJit provides the following options:
305
+ //!
306
+ //! - \ref BaseEmitter::embedLabel() - Embeds absolute address of a label. This is target dependent and would
307
+ //! embed either 32-bit or 64-bit data that embeds absolute label address. This kind of embedding cannot be
308
+ //! used in a position independent code.
309
+ //!
310
+ //! - \ref BaseEmitter::embedLabelDelta() - Embeds a difference between two labels. The size of the difference
311
+ //! can be specified so it's possible to embed 8-bit, 16-bit, 32-bit, and 64-bit difference, which is sufficient
312
+ //! for most purposes.
313
+ //!
314
+ //! The following example demonstrates how to embed labels and their differences:
315
+ //!
316
+ //! ```
317
+ //! #include <asmjit/x86.h>
318
+ //! using namespace asmjit;
319
+ //!
320
+ //! void embedLabel(x86::Assembler& a, const Label& L_Data) {
321
+ //! // [1] Embed L_Data - the size of the data will be dependent on the target.
322
+ //! a.embedLabel(L_Data);
323
+ //!
324
+ //! // [2] Embed a 32-bit difference of two labels.
325
+ //! Label L_Here = a.newLabel();
326
+ //! a.bind(L_Here);
327
+ //! // Embeds int32_t(L_Data - L_Here).
328
+ //! a.embedLabelDelta(L_Data, L_Here, 4);
329
+ //! }
330
+ //! ```
331
+ //!
332
+ //! ### Using FuncFrame and FuncDetail with x86::Assembler
333
+ //!
334
+ //! The example below demonstrates how \ref FuncFrame and \ref FuncDetail can be used together with \ref x86::Assembler
335
+ //! to generate a function that will use platform dependent calling conventions automatically depending on the target:
336
+ //!
337
+ //! ```
338
+ //! #include <asmjit/x86.h>
339
+ //! #include <stdio.h>
340
+ //!
341
+ //! using namespace asmjit;
342
+ //!
343
+ //! typedef void (*SumIntsFunc)(int* dst, const int* a, const int* b);
344
+ //!
345
+ //! int main(int argc, char* argv[]) {
346
+ //! JitRuntime rt; // Create JIT Runtime.
347
+ //! CodeHolder code; // Create a CodeHolder.
348
+ //!
349
+ //! code.init(rt.environment()); // Initialize code to match the JIT environment.
350
+ //! x86::Assembler a(&code); // Create and attach x86::Assembler to code.
351
+ //!
352
+ //! // Decide which registers will be mapped to function arguments. Try changing
353
+ //! // registers of dst, src_a, and src_b and see what happens in function's
354
+ //! // prolog and epilog.
355
+ //! x86::Gp dst = a.zax();
356
+ //! x86::Gp src_a = a.zcx();
357
+ //! x86::Gp src_b = a.zdx();
358
+ //!
359
+ //! X86::Xmm vec0 = x86::xmm0;
360
+ //! X86::Xmm vec1 = x86::xmm1;
361
+ //!
362
+ //! // Create/initialize FuncDetail and FuncFrame.
363
+ //! FuncDetail func;
364
+ //! func.init(FuncSignatureT<void, int*, const int*, const int*>(CallConvId::kHost));
365
+ //!
366
+ //! FuncFrame frame;
367
+ //! frame.init(func);
368
+ //!
369
+ //! // Make XMM0 and XMM1 dirty - RegGroup::kVec describes XMM|YMM|ZMM registers.
370
+ //! frame.setDirtyRegs(RegGroup::kVec, IntUtils::mask(0, 1));
371
+ //!
372
+ //! // Alternatively, if you don't want to use register masks you can pass BaseReg
373
+ //! // to addDirtyRegs(). The following code would add both xmm0 and xmm1.
374
+ //! frame.addDirtyRegs(x86::xmm0, x86::xmm1);
375
+ //!
376
+ //! FuncArgsAssignment args(&func); // Create arguments assignment context.
377
+ //! args.assignAll(dst, src_a, src_b);// Assign our registers to arguments.
378
+ //! args.updateFrameInfo(frame); // Reflect our args in FuncFrame.
379
+ //! frame.finalize(); // Finalize the FuncFrame (updates it).
380
+ //!
381
+ //! a.emitProlog(frame); // Emit function prolog.
382
+ //! a.emitArgsAssignment(frame, args);// Assign arguments to registers.
383
+ //! a.movdqu(vec0, x86::ptr(src_a)); // Load 4 ints from [src_a] to XMM0.
384
+ //! a.movdqu(vec1, x86::ptr(src_b)); // Load 4 ints from [src_b] to XMM1.
385
+ //! a.paddd(vec0, vec1); // Add 4 ints in XMM1 to XMM0.
386
+ //! a.movdqu(x86::ptr(dst), vec0); // Store the result to [dst].
387
+ //! a.emitEpilog(frame); // Emit function epilog and return.
388
+ //!
389
+ //! SumIntsFunc fn;
390
+ //! Error err = rt.add(&fn, &code); // Add the generated code to the runtime.
391
+ //! if (err) return 1; // Handle a possible error case.
392
+ //!
393
+ //! // Execute the generated function.
394
+ //! int inA[4] = { 4, 3, 2, 1 };
395
+ //! int inB[4] = { 1, 5, 2, 8 };
396
+ //! int out[4];
397
+ //! fn(out, inA, inB);
398
+ //!
399
+ //! // Prints {5 8 4 9}
400
+ //! printf("{%d %d %d %d}\n", out[0], out[1], out[2], out[3]);
401
+ //!
402
+ //! rt.release(fn);
403
+ //! return 0;
404
+ //! }
405
+ //! ```
406
+ //!
407
+ //! ### Using x86::Assembler as Code-Patcher
408
+ //!
409
+ //! This is an advanced topic that is sometimes unavoidable. AsmJit by default appends machine code it generates
410
+ //! into a \ref CodeBuffer, however, it also allows to set the offset in \ref CodeBuffer explicitly and to overwrite
411
+ //! its content. This technique is extremely dangerous as X86 instructions have variable length (see below), so you
412
+ //! should in general only patch code to change instruction's immediate values or some other details not known the
413
+ //! at a time the instruction was emitted. A typical scenario that requires code-patching is when you start emitting
414
+ //! function and you don't know how much stack you want to reserve for it.
415
+ //!
416
+ //! Before we go further it's important to introduce instruction options, because they can help with code-patching
417
+ //! (and not only patching, but that will be explained in AVX-512 section):
418
+ //!
419
+ //! - Many general-purpose instructions (especially arithmetic ones) on X86 have multiple encodings - in AsmJit
420
+ //! this is usually called 'short form' and 'long form'.
421
+ //!
422
+ //! - AsmJit always tries to use 'short form' as it makes the resulting machine-code smaller, which is always
423
+ //! good - this decision is used by majority of assemblers out there.
424
+ //!
425
+ //! - AsmJit allows to override the default decision by using `short_()` and `long_()` instruction options to force
426
+ //! short or long form, respectively. The most useful is `long_()` as it basically forces AsmJit to always emit
427
+ //! the longest form. The `short_()` is not that useful as it's automatic (except jumps to non-bound labels). Note
428
+ //! that the underscore after each function name avoids collision with built-in C++ types.
429
+ //!
430
+ //! To illustrate what short form and long form means in binary let's assume we want to emit "add esp, 16" instruction,
431
+ //! which has two possible binary encodings:
432
+ //!
433
+ //! - `83C410` - This is a short form aka `short add esp, 16` - You can see opcode byte (0x8C), MOD/RM byte (0xC4)
434
+ //! and an 8-bit immediate value representing `16`.
435
+ //!
436
+ //! - `81C410000000` - This is a long form aka `long add esp, 16` - You can see a different opcode byte (0x81), the
437
+ //! same Mod/RM byte (0xC4) and a 32-bit immediate in little-endian representing `16`.
438
+ //!
439
+ //! It should be obvious that patching an existing instruction into an instruction having a different size may create
440
+ //! various problems. So it's recommended to be careful and to only patch instructions into instructions having the
441
+ //! same size. The example below demonstrates how instruction options can be used to guarantee the size of an
442
+ //! instruction by forcing the assembler to use long-form encoding:
443
+ //!
444
+ //! ```
445
+ //! #include <asmjit/x86.h>
446
+ //! #include <stdio.h>
447
+ //!
448
+ //! using namespace asmjit;
449
+ //!
450
+ //! typedef int (*Func)(void);
451
+ //!
452
+ //! int main(int argc, char* argv[]) {
453
+ //! JitRuntime rt; // Create a runtime specialized for JIT.
454
+ //! CodeHolder code; // Create a CodeHolder.
455
+ //!
456
+ //! code.init(rt.environment()); // Initialize code to match the JIT environment.
457
+ //! x86::Assembler a(&code); // Create and attach x86::Assembler to code.
458
+ //!
459
+ //! // Let's get these registers from x86::Assembler.
460
+ //! x86::Gp zbp = a.zbp();
461
+ //! x86::Gp zsp = a.zsp();
462
+ //!
463
+ //! // Function prolog.
464
+ //! a.push(zbp);
465
+ //! a.mov(zbp, zsp);
466
+ //!
467
+ //! // This is where we are gonna patch the code later, so let's get the offset
468
+ //! // (the current location) from the beginning of the code-buffer.
469
+ //! size_t patchOffset = a.offset();
470
+ //! // Let's just emit 'sub zsp, 0' for now, but don't forget to use LONG form.
471
+ //! a.long_().sub(zsp, 0);
472
+ //!
473
+ //! // ... emit some code (this just sets return value to zero) ...
474
+ //! a.xor_(x86::eax, x86::eax);
475
+ //!
476
+ //! // Function epilog and return.
477
+ //! a.mov(zsp, zbp);
478
+ //! a.pop(zbp);
479
+ //! a.ret();
480
+ //!
481
+ //! // Now we know how much stack size we want to reserve. I have chosen 128
482
+ //! // bytes on purpose as it's encodable only in long form that we have used.
483
+ //!
484
+ //! int stackSize = 128; // Number of bytes to reserve on the stack.
485
+ //! a.setOffset(patchOffset); // Move the current cursor to `patchOffset`.
486
+ //! a.long_().sub(zsp, stackSize); // Patch the code; don't forget to use LONG form.
487
+ //!
488
+ //! // Now the code is ready to be called
489
+ //! Func fn;
490
+ //! Error err = rt.add(&fn, &code); // Add the generated code to the runtime.
491
+ //! if (err) return 1; // Handle a possible error returned by AsmJit.
492
+ //!
493
+ //! int result = fn(); // Execute the generated code.
494
+ //! printf("%d\n", result); // Print the resulting "0".
495
+ //!
496
+ //! rt.release(fn); // Remove the function from the runtime.
497
+ //! return 0;
498
+ //! }
499
+ //! ```
500
+ //!
501
+ //! If you run the example it will just work, because both instructions have the same size. As an experiment you can
502
+ //! try removing `long_()` form to see what happens when wrong code is generated.
503
+ //!
504
+ //! ### Code Patching and REX Prefix
505
+ //!
506
+ //! In 64-bit mode there is one more thing to worry about when patching code: REX prefix. It's a single byte prefix
507
+ //! designed to address registers with ids from 9 to 15 and to override the default width of operation from 32 to 64
508
+ //! bits. AsmJit, like other assemblers, only emits REX prefix when it's necessary. If the patched code only changes
509
+ //! the immediate value as shown in the previous example then there is nothing to worry about as it doesn't change
510
+ //! the logic behind emitting REX prefix, however, if the patched code changes register id or overrides the operation
511
+ //! width then it's important to take care of REX prefix as well.
512
+ //!
513
+ //! AsmJit contains another instruction option that controls (forces) REX prefix - `rex()`. If you use it the
514
+ //! instruction emitted will always use REX prefix even when it's encodable without it. The following list contains
515
+ //! some instructions and their binary representations to illustrate when it's emitted:
516
+ //!
517
+ //! - `__83C410` - `add esp, 16` - 32-bit operation in 64-bit mode doesn't require REX prefix.
518
+ //! - `4083C410` - `rex add esp, 16` - 32-bit operation in 64-bit mode with forced REX prefix (0x40).
519
+ //! - `4883C410` - `add rsp, 16` - 64-bit operation in 64-bit mode requires REX prefix (0x48).
520
+ //! - `4183C410` - `add r12d, 16` - 32-bit operation in 64-bit mode using R12D requires REX prefix (0x41).
521
+ //! - `4983C410` - `add r12, 16` - 64-bit operation in 64-bit mode using R12 requires REX prefix (0x49).
522
+ //!
523
+ //! ### More Prefixes
524
+ //!
525
+ //! X86 architecture is known for its prefixes. AsmJit supports all prefixes
526
+ //! that can affect how the instruction is encoded:
527
+ //!
528
+ //! ```
529
+ //! #include <asmjit/x86.h>
530
+ //!
531
+ //! using namespace asmjit;
532
+ //!
533
+ //! void prefixesExample(x86::Assembler& a) {
534
+ //! // Lock prefix for implementing atomics:
535
+ //! // lock add dword ptr [dst], 1
536
+ //! a.lock().add(x86::dword_ptr(dst), 1);
537
+ //!
538
+ //! // Similarly, XAcquire/XRelease prefixes are also available:
539
+ //! // xacquire add dword ptr [dst], 1
540
+ //! a.xacquire().add(x86::dword_ptr(dst), 1);
541
+ //!
542
+ //! // Rep prefix (see also repe/repz and repne/repnz):
543
+ //! // rep movs byte ptr [dst], byte ptr [src]
544
+ //! a.rep().movs(x86::byte_ptr(dst), x86::byte_ptr(src));
545
+ //!
546
+ //! // Forcing REX prefix in 64-bit mode.
547
+ //! // rex mov eax, 1
548
+ //! a.rex().mov(x86::eax, 1);
549
+ //!
550
+ //! // AVX instruction without forced prefix uses the shortest encoding:
551
+ //! // vaddpd xmm0, xmm1, xmm2 -> [C5|F1|58|C2]
552
+ //! a.vaddpd(x86::xmm0, x86::xmm1, x86::xmm2);
553
+ //!
554
+ //! // Forcing VEX3 prefix (AVX):
555
+ //! // vex3 vaddpd xmm0, xmm1, xmm2 -> [C4|E1|71|58|C2]
556
+ //! a.vex3().vaddpd(x86::xmm0, x86::xmm1, x86::xmm2);
557
+ //!
558
+ //! // Forcing EVEX prefix (AVX512):
559
+ //! // evex vaddpd xmm0, xmm1, xmm2 -> [62|F1|F5|08|58|C2]
560
+ //! a.evex().vaddpd(x86::xmm0, x86::xmm1, x86::xmm2);
561
+ //!
562
+ //! // Some instructions accept prefixes not originally intended to:
563
+ //! // rep ret
564
+ //! a.rep().ret();
565
+ //! }
566
+ //! ```
567
+ //!
568
+ //! It's important to understand that prefixes are part of instruction options. When a member function that involves
569
+ //! adding a prefix is called the prefix is combined with existing instruction options, which will affect the next
570
+ //! instruction generated.
571
+ //!
572
+ //! ### Generating AVX512 code.
573
+ //!
574
+ //! x86::Assembler can generate AVX512+ code including the use of opmask registers. Opmask can be specified through
575
+ //! \ref x86::Assembler::k() function, which stores it as an extra register, which will be used by the next
576
+ //! instruction. AsmJit uses such concept for manipulating instruction options as well.
577
+ //!
578
+ //! The following AVX512 features are supported:
579
+ //!
580
+ //! - Opmask selector {k} and zeroing {z}.
581
+ //! - Rounding modes {rn|rd|ru|rz} and suppress-all-exceptions {sae} option.
582
+ //! - AVX512 broadcasts {1toN}.
583
+ //!
584
+ //! The following example demonstrates how AVX512 features can be used:
585
+ //!
586
+ //! ```
587
+ //! #include <asmjit/x86.h>
588
+ //!
589
+ //! using namespace asmjit;
590
+ //!
591
+ //! void generateAVX512Code(x86::Assembler& a) {
592
+ //! using namespace x86;
593
+ //!
594
+ //! // Opmask Selectors
595
+ //! // ----------------
596
+ //! //
597
+ //! // - Opmask / zeroing is part of the instruction options / extraReg.
598
+ //! // - k(reg) is like {kreg} in Intel syntax.
599
+ //! // - z() is like {z} in Intel syntax.
600
+ //!
601
+ //! // vaddpd zmm {k1} {z}, zmm1, zmm2
602
+ //! a.k(k1).z().vaddpd(zmm0, zmm1, zmm2);
603
+ //!
604
+ //! // Memory Broadcasts
605
+ //! // -----------------
606
+ //! //
607
+ //! // - Broadcast data is part of memory operand.
608
+ //! // - Use x86::Mem::_1toN(), which returns a new x86::Mem operand.
609
+ //!
610
+ //! // vaddpd zmm0 {k1} {z}, zmm1, [rcx] {1to8}
611
+ //! a.k(k1).z().vaddpd(zmm0, zmm1, x86::mem(rcx)._1to8());
612
+ //!
613
+ //! // Embedded Rounding & Suppress-All-Exceptoins
614
+ //! // -------------------------------------------
615
+ //! //
616
+ //! // - Rounding mode and {sae} are part of instruction options.
617
+ //! // - Use sae() to enable exception suppression.
618
+ //! // - Use rn_sae(), rd_sae(), ru_sae(), and rz_sae() - to enable rounding.
619
+ //! // - Embedded rounding implicitly sets {sae} as well, that's why the API
620
+ //! // also has sae() suffix, to make it clear.
621
+ //!
622
+ //! // vcmppd k1, zmm1, zmm2, 0x00 {sae}
623
+ //! a.sae().vcmppd(k1, zmm1, zmm2, 0);
624
+ //!
625
+ //! // vaddpd zmm0, zmm1, zmm2 {rz}
626
+ //! a.rz_sae().vaddpd(zmm0, zmm1, zmm2);
627
+ //! }
628
+ //! ```
629
+ class ASMJIT_VIRTAPI Assembler
630
+ : public BaseAssembler,
631
+ public EmitterImplicitT<Assembler> {
632
+ public:
633
+ ASMJIT_NONCOPYABLE(Assembler)
634
+ typedef BaseAssembler Base;
635
+
636
+ //! \name Construction & Destruction
637
+ //! \{
638
+
639
+ ASMJIT_API explicit Assembler(CodeHolder* code = nullptr) noexcept;
640
+ ASMJIT_API virtual ~Assembler() noexcept;
641
+
642
+ //! \}
643
+
644
+ //! \cond INTERNAL
645
+ //! \name Internal
646
+ //! \{
647
+
648
+ // NOTE: x86::Assembler uses _privateData to store 'address-override' bit that is used to decide whether to emit
649
+ // address-override (67H) prefix based on the memory BASE+INDEX registers. It's either `kX86MemInfo_67H_X86` or
650
+ // `kX86MemInfo_67H_X64`.
651
+ inline uint32_t _addressOverrideMask() const noexcept { return _privateData; }
652
+ inline void _setAddressOverrideMask(uint32_t m) noexcept { _privateData = m; }
653
+
654
+ //! \}
655
+ //! \endcond
656
+
657
+ //! \name Emit
658
+ //! \{
659
+
660
+ ASMJIT_API Error _emit(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) override;
661
+
662
+ //! \}
663
+ //! \endcond
664
+
665
+ //! \name Align
666
+ //! \{
667
+
668
+ ASMJIT_API Error align(AlignMode alignMode, uint32_t alignment) override;
669
+
670
+ //! \}
671
+
672
+ //! \name Events
673
+ //! \{
674
+
675
+ ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
676
+ ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
677
+
678
+ //! \}
679
+ };
680
+
681
+ //! \}
682
+
683
+ ASMJIT_END_SUB_NAMESPACE
684
+
685
+ #endif // ASMJIT_X86_X86ASSEMBLER_H_INCLUDED