asmjit 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/asmjit.gemspec +1 -1
- data/ext/asmjit/asmjit/.editorconfig +10 -0
- data/ext/asmjit/asmjit/.github/FUNDING.yml +1 -0
- data/ext/asmjit/asmjit/.github/workflows/build-config.json +47 -0
- data/ext/asmjit/asmjit/.github/workflows/build.yml +156 -0
- data/ext/asmjit/asmjit/.gitignore +6 -0
- data/ext/asmjit/asmjit/CMakeLists.txt +611 -0
- data/ext/asmjit/asmjit/LICENSE.md +17 -0
- data/ext/asmjit/asmjit/README.md +69 -0
- data/ext/asmjit/asmjit/src/asmjit/a64.h +62 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64archtraits_p.h +81 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64assembler.cpp +5115 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64assembler.h +72 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64builder.cpp +51 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64builder.h +57 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64compiler.cpp +60 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64compiler.h +247 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64emithelper.cpp +464 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64emithelper_p.h +50 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64emitter.h +1228 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64formatter.cpp +298 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64formatter_p.h +59 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64func.cpp +189 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64func_p.h +33 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64globals.h +1894 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64instapi.cpp +278 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64instapi_p.h +41 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb.cpp +1957 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb.h +74 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb_p.h +876 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64operand.cpp +85 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64operand.h +312 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64rapass.cpp +852 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64rapass_p.h +105 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64utils.h +179 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/armformatter.cpp +143 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/armformatter_p.h +44 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/armglobals.h +21 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/armoperand.h +621 -0
- data/ext/asmjit/asmjit/src/asmjit/arm.h +62 -0
- data/ext/asmjit/asmjit/src/asmjit/asmjit-scope-begin.h +17 -0
- data/ext/asmjit/asmjit/src/asmjit/asmjit-scope-end.h +9 -0
- data/ext/asmjit/asmjit/src/asmjit/asmjit.h +33 -0
- data/ext/asmjit/asmjit/src/asmjit/core/api-build_p.h +55 -0
- data/ext/asmjit/asmjit/src/asmjit/core/api-config.h +613 -0
- data/ext/asmjit/asmjit/src/asmjit/core/archcommons.h +229 -0
- data/ext/asmjit/asmjit/src/asmjit/core/archtraits.cpp +160 -0
- data/ext/asmjit/asmjit/src/asmjit/core/archtraits.h +290 -0
- data/ext/asmjit/asmjit/src/asmjit/core/assembler.cpp +406 -0
- data/ext/asmjit/asmjit/src/asmjit/core/assembler.h +129 -0
- data/ext/asmjit/asmjit/src/asmjit/core/builder.cpp +889 -0
- data/ext/asmjit/asmjit/src/asmjit/core/builder.h +1391 -0
- data/ext/asmjit/asmjit/src/asmjit/core/codebuffer.h +113 -0
- data/ext/asmjit/asmjit/src/asmjit/core/codeholder.cpp +1149 -0
- data/ext/asmjit/asmjit/src/asmjit/core/codeholder.h +1035 -0
- data/ext/asmjit/asmjit/src/asmjit/core/codewriter.cpp +175 -0
- data/ext/asmjit/asmjit/src/asmjit/core/codewriter_p.h +179 -0
- data/ext/asmjit/asmjit/src/asmjit/core/compiler.cpp +582 -0
- data/ext/asmjit/asmjit/src/asmjit/core/compiler.h +737 -0
- data/ext/asmjit/asmjit/src/asmjit/core/compilerdefs.h +173 -0
- data/ext/asmjit/asmjit/src/asmjit/core/constpool.cpp +363 -0
- data/ext/asmjit/asmjit/src/asmjit/core/constpool.h +250 -0
- data/ext/asmjit/asmjit/src/asmjit/core/cpuinfo.cpp +1162 -0
- data/ext/asmjit/asmjit/src/asmjit/core/cpuinfo.h +813 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emithelper.cpp +323 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emithelper_p.h +58 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emitter.cpp +333 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emitter.h +741 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emitterutils.cpp +129 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emitterutils_p.h +89 -0
- data/ext/asmjit/asmjit/src/asmjit/core/environment.cpp +46 -0
- data/ext/asmjit/asmjit/src/asmjit/core/environment.h +508 -0
- data/ext/asmjit/asmjit/src/asmjit/core/errorhandler.cpp +14 -0
- data/ext/asmjit/asmjit/src/asmjit/core/errorhandler.h +228 -0
- data/ext/asmjit/asmjit/src/asmjit/core/formatter.cpp +584 -0
- data/ext/asmjit/asmjit/src/asmjit/core/formatter.h +247 -0
- data/ext/asmjit/asmjit/src/asmjit/core/formatter_p.h +34 -0
- data/ext/asmjit/asmjit/src/asmjit/core/func.cpp +286 -0
- data/ext/asmjit/asmjit/src/asmjit/core/func.h +1445 -0
- data/ext/asmjit/asmjit/src/asmjit/core/funcargscontext.cpp +293 -0
- data/ext/asmjit/asmjit/src/asmjit/core/funcargscontext_p.h +199 -0
- data/ext/asmjit/asmjit/src/asmjit/core/globals.cpp +133 -0
- data/ext/asmjit/asmjit/src/asmjit/core/globals.h +393 -0
- data/ext/asmjit/asmjit/src/asmjit/core/inst.cpp +113 -0
- data/ext/asmjit/asmjit/src/asmjit/core/inst.h +772 -0
- data/ext/asmjit/asmjit/src/asmjit/core/jitallocator.cpp +1242 -0
- data/ext/asmjit/asmjit/src/asmjit/core/jitallocator.h +261 -0
- data/ext/asmjit/asmjit/src/asmjit/core/jitruntime.cpp +80 -0
- data/ext/asmjit/asmjit/src/asmjit/core/jitruntime.h +89 -0
- data/ext/asmjit/asmjit/src/asmjit/core/logger.cpp +69 -0
- data/ext/asmjit/asmjit/src/asmjit/core/logger.h +198 -0
- data/ext/asmjit/asmjit/src/asmjit/core/misc_p.h +33 -0
- data/ext/asmjit/asmjit/src/asmjit/core/operand.cpp +132 -0
- data/ext/asmjit/asmjit/src/asmjit/core/operand.h +1611 -0
- data/ext/asmjit/asmjit/src/asmjit/core/osutils.cpp +84 -0
- data/ext/asmjit/asmjit/src/asmjit/core/osutils.h +61 -0
- data/ext/asmjit/asmjit/src/asmjit/core/osutils_p.h +68 -0
- data/ext/asmjit/asmjit/src/asmjit/core/raassignment_p.h +418 -0
- data/ext/asmjit/asmjit/src/asmjit/core/rabuilders_p.h +612 -0
- data/ext/asmjit/asmjit/src/asmjit/core/radefs_p.h +1204 -0
- data/ext/asmjit/asmjit/src/asmjit/core/ralocal.cpp +1166 -0
- data/ext/asmjit/asmjit/src/asmjit/core/ralocal_p.h +254 -0
- data/ext/asmjit/asmjit/src/asmjit/core/rapass.cpp +1969 -0
- data/ext/asmjit/asmjit/src/asmjit/core/rapass_p.h +1183 -0
- data/ext/asmjit/asmjit/src/asmjit/core/rastack.cpp +184 -0
- data/ext/asmjit/asmjit/src/asmjit/core/rastack_p.h +171 -0
- data/ext/asmjit/asmjit/src/asmjit/core/string.cpp +559 -0
- data/ext/asmjit/asmjit/src/asmjit/core/string.h +372 -0
- data/ext/asmjit/asmjit/src/asmjit/core/support.cpp +494 -0
- data/ext/asmjit/asmjit/src/asmjit/core/support.h +1773 -0
- data/ext/asmjit/asmjit/src/asmjit/core/target.cpp +14 -0
- data/ext/asmjit/asmjit/src/asmjit/core/target.h +53 -0
- data/ext/asmjit/asmjit/src/asmjit/core/type.cpp +74 -0
- data/ext/asmjit/asmjit/src/asmjit/core/type.h +419 -0
- data/ext/asmjit/asmjit/src/asmjit/core/virtmem.cpp +722 -0
- data/ext/asmjit/asmjit/src/asmjit/core/virtmem.h +242 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zone.cpp +353 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zone.h +615 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonehash.cpp +309 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonehash.h +186 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonelist.cpp +163 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonelist.h +209 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonestack.cpp +176 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonestack.h +239 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonestring.h +120 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonetree.cpp +99 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonetree.h +380 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonevector.cpp +356 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonevector.h +690 -0
- data/ext/asmjit/asmjit/src/asmjit/core.h +1861 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86archtraits_p.h +148 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86assembler.cpp +5110 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86assembler.h +685 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86builder.cpp +52 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86builder.h +351 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86compiler.cpp +61 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86compiler.h +721 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86emithelper.cpp +619 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86emithelper_p.h +60 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86emitter.h +4315 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86formatter.cpp +944 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86formatter_p.h +58 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86func.cpp +503 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86func_p.h +33 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86globals.h +2169 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86instapi.cpp +1732 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86instapi_p.h +41 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb.cpp +4427 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb.h +563 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb_p.h +311 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86opcode_p.h +436 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86operand.cpp +231 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86operand.h +1085 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86rapass.cpp +1509 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86rapass_p.h +94 -0
- data/ext/asmjit/asmjit/src/asmjit/x86.h +93 -0
- data/ext/asmjit/asmjit/src/asmjit.natvis +245 -0
- data/ext/asmjit/asmjit/test/asmjit_test_assembler.cpp +84 -0
- data/ext/asmjit/asmjit/test/asmjit_test_assembler.h +85 -0
- data/ext/asmjit/asmjit/test/asmjit_test_assembler_a64.cpp +4006 -0
- data/ext/asmjit/asmjit/test/asmjit_test_assembler_x64.cpp +17833 -0
- data/ext/asmjit/asmjit/test/asmjit_test_assembler_x86.cpp +8300 -0
- data/ext/asmjit/asmjit/test/asmjit_test_compiler.cpp +253 -0
- data/ext/asmjit/asmjit/test/asmjit_test_compiler.h +73 -0
- data/ext/asmjit/asmjit/test/asmjit_test_compiler_a64.cpp +690 -0
- data/ext/asmjit/asmjit/test/asmjit_test_compiler_x86.cpp +4317 -0
- data/ext/asmjit/asmjit/test/asmjit_test_emitters.cpp +197 -0
- data/ext/asmjit/asmjit/test/asmjit_test_instinfo.cpp +181 -0
- data/ext/asmjit/asmjit/test/asmjit_test_misc.h +257 -0
- data/ext/asmjit/asmjit/test/asmjit_test_perf.cpp +62 -0
- data/ext/asmjit/asmjit/test/asmjit_test_perf.h +61 -0
- data/ext/asmjit/asmjit/test/asmjit_test_perf_a64.cpp +699 -0
- data/ext/asmjit/asmjit/test/asmjit_test_perf_x86.cpp +5032 -0
- data/ext/asmjit/asmjit/test/asmjit_test_unit.cpp +172 -0
- data/ext/asmjit/asmjit/test/asmjit_test_x86_sections.cpp +172 -0
- data/ext/asmjit/asmjit/test/asmjitutils.h +38 -0
- data/ext/asmjit/asmjit/test/broken.cpp +312 -0
- data/ext/asmjit/asmjit/test/broken.h +148 -0
- data/ext/asmjit/asmjit/test/cmdline.h +61 -0
- data/ext/asmjit/asmjit/test/performancetimer.h +41 -0
- data/ext/asmjit/asmjit/tools/configure-makefiles.sh +13 -0
- data/ext/asmjit/asmjit/tools/configure-ninja.sh +13 -0
- data/ext/asmjit/asmjit/tools/configure-sanitizers.sh +13 -0
- data/ext/asmjit/asmjit/tools/configure-vs2019-x64.bat +2 -0
- data/ext/asmjit/asmjit/tools/configure-vs2019-x86.bat +2 -0
- data/ext/asmjit/asmjit/tools/configure-vs2022-x64.bat +2 -0
- data/ext/asmjit/asmjit/tools/configure-vs2022-x86.bat +2 -0
- data/ext/asmjit/asmjit/tools/configure-xcode.sh +8 -0
- data/ext/asmjit/asmjit/tools/enumgen.js +417 -0
- data/ext/asmjit/asmjit/tools/enumgen.sh +3 -0
- data/ext/asmjit/asmjit/tools/tablegen-arm.js +365 -0
- data/ext/asmjit/asmjit/tools/tablegen-arm.sh +3 -0
- data/ext/asmjit/asmjit/tools/tablegen-x86.js +2638 -0
- data/ext/asmjit/asmjit/tools/tablegen-x86.sh +3 -0
- data/ext/asmjit/asmjit/tools/tablegen.js +947 -0
- data/ext/asmjit/asmjit/tools/tablegen.sh +4 -0
- data/ext/asmjit/asmjit.cc +18 -0
- data/lib/asmjit/version.rb +1 -1
- metadata +197 -2
@@ -0,0 +1,685 @@
|
|
1
|
+
// This file is part of AsmJit project <https://asmjit.com>
|
2
|
+
//
|
3
|
+
// See asmjit.h or LICENSE.md for license and copyright information
|
4
|
+
// SPDX-License-Identifier: Zlib
|
5
|
+
|
6
|
+
#ifndef ASMJIT_X86_X86ASSEMBLER_H_INCLUDED
|
7
|
+
#define ASMJIT_X86_X86ASSEMBLER_H_INCLUDED
|
8
|
+
|
9
|
+
#include "../core/assembler.h"
|
10
|
+
#include "../x86/x86emitter.h"
|
11
|
+
#include "../x86/x86operand.h"
|
12
|
+
|
13
|
+
ASMJIT_BEGIN_SUB_NAMESPACE(x86)
|
14
|
+
|
15
|
+
//! \addtogroup asmjit_x86
|
16
|
+
//! \{
|
17
|
+
|
18
|
+
//! X86/X64 assembler implementation.
|
19
|
+
//!
|
20
|
+
//! x86::Assembler is a code emitter that emits machine code directly into the \ref CodeBuffer. The assembler is capable
|
21
|
+
//! of targeting both 32-bit and 64-bit instruction sets, the instruction set can be configured through \ref CodeHolder.
|
22
|
+
//!
|
23
|
+
//! ### Basics
|
24
|
+
//!
|
25
|
+
//! The following example shows a basic use of `x86::Assembler`, how to generate a function that works in both 32-bit
|
26
|
+
//! and 64-bit modes, and how to connect \ref JitRuntime, \ref CodeHolder, and `x86::Assembler`.
|
27
|
+
//!
|
28
|
+
//! ```
|
29
|
+
//! #include <asmjit/x86.h>
|
30
|
+
//! #include <stdio.h>
|
31
|
+
//!
|
32
|
+
//! using namespace asmjit;
|
33
|
+
//!
|
34
|
+
//! // Signature of the generated function.
|
35
|
+
//! typedef int (*SumFunc)(const int* arr, size_t count);
|
36
|
+
//!
|
37
|
+
//! int main() {
|
38
|
+
//! JitRuntime rt; // Create a runtime specialized for JIT.
|
39
|
+
//! CodeHolder code; // Create a CodeHolder.
|
40
|
+
//!
|
41
|
+
//! code.init(rt.environment()); // Initialize code to match the JIT environment.
|
42
|
+
//! x86::Assembler a(&code); // Create and attach x86::Assembler to code.
|
43
|
+
//!
|
44
|
+
//! // Decide between 32-bit CDECL, WIN64, and SysV64 calling conventions:
|
45
|
+
//! // 32-BIT - passed all arguments by stack.
|
46
|
+
//! // WIN64 - passes first 4 arguments by RCX, RDX, R8, and R9.
|
47
|
+
//! // UNIX64 - passes first 6 arguments by RDI, RSI, RCX, RDX, R8, and R9.
|
48
|
+
//! x86::Gp arr, cnt;
|
49
|
+
//! x86::Gp sum = x86::eax; // Use EAX as 'sum' as it's a return register.
|
50
|
+
//!
|
51
|
+
//! if (ASMJIT_ARCH_BITS == 64) {
|
52
|
+
//! #if defined(_WIN32)
|
53
|
+
//! arr = x86::rcx; // First argument (array ptr).
|
54
|
+
//! cnt = x86::rdx; // Second argument (number of elements)
|
55
|
+
//! #else
|
56
|
+
//! arr = x86::rdi; // First argument (array ptr).
|
57
|
+
//! cnt = x86::rsi; // Second argument (number of elements)
|
58
|
+
//! #endif
|
59
|
+
//! }
|
60
|
+
//! else {
|
61
|
+
//! arr = x86::edx; // Use EDX to hold the array pointer.
|
62
|
+
//! cnt = x86::ecx; // Use ECX to hold the counter.
|
63
|
+
//! // Fetch first and second arguments from [ESP + 4] and [ESP + 8].
|
64
|
+
//! a.mov(arr, x86::ptr(x86::esp, 4));
|
65
|
+
//! a.mov(cnt, x86::ptr(x86::esp, 8));
|
66
|
+
//! }
|
67
|
+
//!
|
68
|
+
//! Label Loop = a.newLabel(); // To construct the loop, we need some labels.
|
69
|
+
//! Label Exit = a.newLabel();
|
70
|
+
//!
|
71
|
+
//! a.xor_(sum, sum); // Clear 'sum' register (shorter than 'mov').
|
72
|
+
//! a.test(cnt, cnt); // Border case:
|
73
|
+
//! a.jz(Exit); // If 'cnt' is zero jump to 'Exit' now.
|
74
|
+
//!
|
75
|
+
//! a.bind(Loop); // Start of a loop iteration.
|
76
|
+
//! a.add(sum, x86::dword_ptr(arr)); // Add int at [arr] to 'sum'.
|
77
|
+
//! a.add(arr, 4); // Increment 'arr' pointer.
|
78
|
+
//! a.dec(cnt); // Decrease 'cnt'.
|
79
|
+
//! a.jnz(Loop); // If not zero jump to 'Loop'.
|
80
|
+
//!
|
81
|
+
//! a.bind(Exit); // Exit to handle the border case.
|
82
|
+
//! a.ret(); // Return from function ('sum' == 'eax').
|
83
|
+
//! // ----> x86::Assembler is no longer needed from here and can be destroyed <----
|
84
|
+
//!
|
85
|
+
//! SumFunc fn;
|
86
|
+
//! Error err = rt.add(&fn, &code); // Add the generated code to the runtime.
|
87
|
+
//!
|
88
|
+
//! if (err) return 1; // Handle a possible error returned by AsmJit.
|
89
|
+
//! // ----> CodeHolder is no longer needed from here and can be destroyed <----
|
90
|
+
//!
|
91
|
+
//! static const int array[6] = { 4, 8, 15, 16, 23, 42 };
|
92
|
+
//!
|
93
|
+
//! int result = fn(array, 6); // Execute the generated code.
|
94
|
+
//! printf("%d\n", result); // Print sum of array (108).
|
95
|
+
//!
|
96
|
+
//! rt.release(fn); // Explicitly remove the function from the runtime
|
97
|
+
//! return 0; // Everything successful...
|
98
|
+
//! }
|
99
|
+
//! ```
|
100
|
+
//!
|
101
|
+
//! The example should be self-explanatory. It shows how to work with labels, how to use operands, and how to emit
|
102
|
+
//! instructions that can use different registers based on runtime selection. It implements 32-bit CDECL, WIN64,
|
103
|
+
//! and SysV64 caling conventions and will work on most X86/X64 environments.
|
104
|
+
//!
|
105
|
+
//! Although functions prologs / epilogs can be implemented manually, AsmJit provides utilities that can be used
|
106
|
+
//! to create function prologs and epilogs automatically, see \ref asmjit_function for more details.
|
107
|
+
//!
|
108
|
+
//! ### Instruction Validation
|
109
|
+
//!
|
110
|
+
//! Assembler prefers speed over strictness by default. The implementation checks the type of operands and fails
|
111
|
+
//! if the signature of types is invalid, however, it does only basic checks regarding registers and their groups
|
112
|
+
//! used in instructions. It's possible to pass operands that don't form any valid signature to the implementation
|
113
|
+
//! and succeed. This is usually not a problem as Assembler provides typed API so operand types are normally checked
|
114
|
+
//! by C++ compiler at compile time, however, Assembler is fully dynamic and its \ref emit() function can be called
|
115
|
+
//! with any instruction id, options, and operands. Moreover, it's also possible to form instructions that will be
|
116
|
+
//! accepted by the typed API, for example by calling `mov(x86::eax, x86::al)` - the C++ compiler won't see a problem
|
117
|
+
//! as both EAX and AL are \ref Gp registers.
|
118
|
+
//!
|
119
|
+
//! To help with common mistakes AsmJit allows to activate instruction validation. This feature instruments
|
120
|
+
//! the Assembler to call \ref InstAPI::validate() before it attempts to encode any instruction.
|
121
|
+
//!
|
122
|
+
//! The example below illustrates how validation can be turned on:
|
123
|
+
//!
|
124
|
+
//! ```
|
125
|
+
//! #include <asmjit/x86.h>
|
126
|
+
//! #include <stdio.h>
|
127
|
+
//!
|
128
|
+
//! using namespace asmjit;
|
129
|
+
//!
|
130
|
+
//! int main(int argc, char* argv[]) {
|
131
|
+
//! JitRuntime rt; // Create a runtime specialized for JIT.
|
132
|
+
//! CodeHolder code; // Create a CodeHolder.
|
133
|
+
//!
|
134
|
+
//! code.init(rt.environment()); // Initialize code to match the JIT environment.
|
135
|
+
//! x86::Assembler a(&code); // Create and attach x86::Assembler to code.
|
136
|
+
//!
|
137
|
+
//! // Enable strict validation.
|
138
|
+
//! a.addDiagnosticOptions(DiagnosticOptions::kValidateAssembler);
|
139
|
+
//!
|
140
|
+
//! // Try to encode invalid or ill-formed instructions.
|
141
|
+
//! Error err;
|
142
|
+
//!
|
143
|
+
//! // Invalid instruction.
|
144
|
+
//! err = a.mov(x86::eax, x86::al);
|
145
|
+
//! printf("Status: %s\n", DebugUtils::errorAsString(err));
|
146
|
+
//!
|
147
|
+
//! // Invalid instruction.
|
148
|
+
//! err = a.emit(x86::Inst::kIdMovss, x86::eax, x86::xmm0);
|
149
|
+
//! printf("Status: %s\n", DebugUtils::errorAsString(err));
|
150
|
+
//!
|
151
|
+
//! // Ambiguous operand size - the pointer requires size.
|
152
|
+
//! err = a.inc(x86::ptr(x86::rax), 1);
|
153
|
+
//! printf("Status: %s\n", DebugUtils::errorAsString(err));
|
154
|
+
//!
|
155
|
+
//! return 0;
|
156
|
+
//! }
|
157
|
+
//! ```
|
158
|
+
//!
|
159
|
+
//! ### Native Registers
|
160
|
+
//!
|
161
|
+
//! All emitters provide functions to construct machine-size registers depending on the target. This feature is
|
162
|
+
//! for users that want to write code targeting both 32-bit and 64-bit architectures at the same time. In AsmJit
|
163
|
+
//! terminology such registers have prefix `z`, so for example on X86 architecture the following native registers
|
164
|
+
//! are provided:
|
165
|
+
//!
|
166
|
+
//! - `zax` - mapped to either `eax` or `rax`
|
167
|
+
//! - `zbx` - mapped to either `ebx` or `rbx`
|
168
|
+
//! - `zcx` - mapped to either `ecx` or `rcx`
|
169
|
+
//! - `zdx` - mapped to either `edx` or `rdx`
|
170
|
+
//! - `zsp` - mapped to either `esp` or `rsp`
|
171
|
+
//! - `zbp` - mapped to either `ebp` or `rbp`
|
172
|
+
//! - `zsi` - mapped to either `esi` or `rsi`
|
173
|
+
//! - `zdi` - mapped to either `edi` or `rdi`
|
174
|
+
//!
|
175
|
+
//! They are accessible through \ref x86::Assembler, \ref x86::Builder, and \ref x86::Compiler. The example below
|
176
|
+
//! illustrates how to use this feature:
|
177
|
+
//!
|
178
|
+
//! ```
|
179
|
+
//! #include <asmjit/x86.h>
|
180
|
+
//! #include <stdio.h>
|
181
|
+
//!
|
182
|
+
//! using namespace asmjit;
|
183
|
+
//!
|
184
|
+
//! typedef int (*Func)(void);
|
185
|
+
//!
|
186
|
+
//! int main(int argc, char* argv[]) {
|
187
|
+
//! JitRuntime rt; // Create a runtime specialized for JIT.
|
188
|
+
//! CodeHolder code; // Create a CodeHolder.
|
189
|
+
//!
|
190
|
+
//! code.init(rt.environment()); // Initialize code to match the JIT environment.
|
191
|
+
//! x86::Assembler a(&code); // Create and attach x86::Assembler to code.
|
192
|
+
//!
|
193
|
+
//! // Let's get these registers from x86::Assembler.
|
194
|
+
//! x86::Gp zbp = a.zbp();
|
195
|
+
//! x86::Gp zsp = a.zsp();
|
196
|
+
//!
|
197
|
+
//! int stackSize = 32;
|
198
|
+
//!
|
199
|
+
//! // Function prolog.
|
200
|
+
//! a.push(zbp);
|
201
|
+
//! a.mov(zbp, zsp);
|
202
|
+
//! a.sub(zsp, stackSize);
|
203
|
+
//!
|
204
|
+
//! // ... emit some code (this just sets return value to zero) ...
|
205
|
+
//! a.xor_(x86::eax, x86::eax);
|
206
|
+
//!
|
207
|
+
//! // Function epilog and return.
|
208
|
+
//! a.mov(zsp, zbp);
|
209
|
+
//! a.pop(zbp);
|
210
|
+
//! a.ret();
|
211
|
+
//!
|
212
|
+
//! // To make the example complete let's call it.
|
213
|
+
//! Func fn;
|
214
|
+
//! Error err = rt.add(&fn, &code); // Add the generated code to the runtime.
|
215
|
+
//! if (err) return 1; // Handle a possible error returned by AsmJit.
|
216
|
+
//!
|
217
|
+
//! int result = fn(); // Execute the generated code.
|
218
|
+
//! printf("%d\n", result); // Print the resulting "0".
|
219
|
+
//!
|
220
|
+
//! rt.release(fn); // Remove the function from the runtime.
|
221
|
+
//! return 0;
|
222
|
+
//! }
|
223
|
+
//! ```
|
224
|
+
//!
|
225
|
+
//! The example just returns `0`, but the function generated contains a standard prolog and epilog sequence and the
|
226
|
+
//! function itself reserves 32 bytes of local stack. The advantage is clear - a single code-base can handle multiple
|
227
|
+
//! targets easily. If you want to create a register of native size dynamically by specifying its id it's also possible:
|
228
|
+
//!
|
229
|
+
//! ```
|
230
|
+
//! void example(x86::Assembler& a) {
|
231
|
+
//! x86::Gp zax = a.gpz(x86::Gp::kIdAx);
|
232
|
+
//! x86::Gp zbx = a.gpz(x86::Gp::kIdBx);
|
233
|
+
//! x86::Gp zcx = a.gpz(x86::Gp::kIdCx);
|
234
|
+
//! x86::Gp zdx = a.gpz(x86::Gp::kIdDx);
|
235
|
+
//!
|
236
|
+
//! // You can also change register's id easily.
|
237
|
+
//! x86::Gp zsp = zax;
|
238
|
+
//! zsp.setId(4); // or x86::Gp::kIdSp.
|
239
|
+
//! }
|
240
|
+
//! ```
|
241
|
+
//!
|
242
|
+
//! ### Data Embedding
|
243
|
+
//!
|
244
|
+
//! x86::Assembler extends the standard \ref BaseAssembler with X86/X64 specific conventions that are often used by
|
245
|
+
//! assemblers to embed data next to the code. The following functions can be used to embed data:
|
246
|
+
//!
|
247
|
+
//! - \ref BaseAssembler::embedInt8() - embeds int8_t (portable naming).
|
248
|
+
//! - \ref BaseAssembler::embedUInt8() - embeds uint8_t (portable naming).
|
249
|
+
//! - \ref BaseAssembler::embedInt16() - embeds int16_t (portable naming).
|
250
|
+
//! - \ref BaseAssembler::embedUInt16() - embeds uint16_t (portable naming).
|
251
|
+
//! - \ref BaseAssembler::embedInt32() - embeds int32_t (portable naming).
|
252
|
+
//! - \ref BaseAssembler::embedUInt32() - embeds uint32_t (portable naming).
|
253
|
+
//! - \ref BaseAssembler::embedInt64() - embeds int64_t (portable naming).
|
254
|
+
//! - \ref BaseAssembler::embedUInt64() - embeds uint64_t (portable naming).
|
255
|
+
//! - \ref BaseAssembler::embedFloat() - embeds float (portable naming).
|
256
|
+
//! - \ref BaseAssembler::embedDouble() - embeds double (portable naming).
|
257
|
+
//!
|
258
|
+
//! - \ref x86::Assembler::db() - embeds byte (8 bits) (x86 naming).
|
259
|
+
//! - \ref x86::Assembler::dw() - embeds word (16 bits) (x86 naming).
|
260
|
+
//! - \ref x86::Assembler::dd() - embeds dword (32 bits) (x86 naming).
|
261
|
+
//! - \ref x86::Assembler::dq() - embeds qword (64 bits) (x86 naming).
|
262
|
+
//!
|
263
|
+
//! The following example illustrates how embed works:
|
264
|
+
//!
|
265
|
+
//! ```
|
266
|
+
//! #include <asmjit/x86.h>
|
267
|
+
//! using namespace asmjit;
|
268
|
+
//!
|
269
|
+
//! void embedData(x86::Assembler& a) {
|
270
|
+
//! a.db(0xFF); // Embeds 0xFF byte.
|
271
|
+
//! a.dw(0xFF00); // Embeds 0xFF00 word (little-endian).
|
272
|
+
//! a.dd(0xFF000000); // Embeds 0xFF000000 dword (little-endian).
|
273
|
+
//! a.embedFloat(0.4f); // Embeds 0.4f (32-bit float, little-endian).
|
274
|
+
//! }
|
275
|
+
//! ```
|
276
|
+
//!
|
277
|
+
//! Sometimes it's required to read the data that is embedded after code, for example. This can be done through
|
278
|
+
//! \ref Label as shown below:
|
279
|
+
//!
|
280
|
+
//! ```
|
281
|
+
//! #include <asmjit/x86.h>
|
282
|
+
//! using namespace asmjit;
|
283
|
+
//!
|
284
|
+
//! void embedData(x86::Assembler& a, const Label& L_Data) {
|
285
|
+
//! x86::Gp addr = a.zax(); // EAX or RAX.
|
286
|
+
//! x86::Gp val = x86::edi; // Where to store some value...
|
287
|
+
//!
|
288
|
+
//! // Approach 1 - Load the address to register through LEA. This approach
|
289
|
+
//! // is flexible as the address can be then manipulated, for
|
290
|
+
//! // example if you have a data array, which would need index.
|
291
|
+
//! a.lea(addr, L_Data); // Loads the address of the label to EAX or RAX.
|
292
|
+
//! a.mov(val, dword_ptr(addr));
|
293
|
+
//!
|
294
|
+
//! // Approach 2 - Load the data directly by using L_Data in address. It's
|
295
|
+
//! // worth noting that this doesn't work with indexes in X64
|
296
|
+
//! // mode. It will use absolute address in 32-bit mode and
|
297
|
+
//! // relative address (RIP) in 64-bit mode.
|
298
|
+
//! a.mov(val, dword_ptr(L_Data));
|
299
|
+
//! }
|
300
|
+
//! ```
|
301
|
+
//!
|
302
|
+
//! ### Label Embedding
|
303
|
+
//!
|
304
|
+
//! It's also possible to embed labels. In general AsmJit provides the following options:
|
305
|
+
//!
|
306
|
+
//! - \ref BaseEmitter::embedLabel() - Embeds absolute address of a label. This is target dependent and would
|
307
|
+
//! embed either 32-bit or 64-bit data that embeds absolute label address. This kind of embedding cannot be
|
308
|
+
//! used in a position independent code.
|
309
|
+
//!
|
310
|
+
//! - \ref BaseEmitter::embedLabelDelta() - Embeds a difference between two labels. The size of the difference
|
311
|
+
//! can be specified so it's possible to embed 8-bit, 16-bit, 32-bit, and 64-bit difference, which is sufficient
|
312
|
+
//! for most purposes.
|
313
|
+
//!
|
314
|
+
//! The following example demonstrates how to embed labels and their differences:
|
315
|
+
//!
|
316
|
+
//! ```
|
317
|
+
//! #include <asmjit/x86.h>
|
318
|
+
//! using namespace asmjit;
|
319
|
+
//!
|
320
|
+
//! void embedLabel(x86::Assembler& a, const Label& L_Data) {
|
321
|
+
//! // [1] Embed L_Data - the size of the data will be dependent on the target.
|
322
|
+
//! a.embedLabel(L_Data);
|
323
|
+
//!
|
324
|
+
//! // [2] Embed a 32-bit difference of two labels.
|
325
|
+
//! Label L_Here = a.newLabel();
|
326
|
+
//! a.bind(L_Here);
|
327
|
+
//! // Embeds int32_t(L_Data - L_Here).
|
328
|
+
//! a.embedLabelDelta(L_Data, L_Here, 4);
|
329
|
+
//! }
|
330
|
+
//! ```
|
331
|
+
//!
|
332
|
+
//! ### Using FuncFrame and FuncDetail with x86::Assembler
|
333
|
+
//!
|
334
|
+
//! The example below demonstrates how \ref FuncFrame and \ref FuncDetail can be used together with \ref x86::Assembler
|
335
|
+
//! to generate a function that will use platform dependent calling conventions automatically depending on the target:
|
336
|
+
//!
|
337
|
+
//! ```
|
338
|
+
//! #include <asmjit/x86.h>
|
339
|
+
//! #include <stdio.h>
|
340
|
+
//!
|
341
|
+
//! using namespace asmjit;
|
342
|
+
//!
|
343
|
+
//! typedef void (*SumIntsFunc)(int* dst, const int* a, const int* b);
|
344
|
+
//!
|
345
|
+
//! int main(int argc, char* argv[]) {
|
346
|
+
//! JitRuntime rt; // Create JIT Runtime.
|
347
|
+
//! CodeHolder code; // Create a CodeHolder.
|
348
|
+
//!
|
349
|
+
//! code.init(rt.environment()); // Initialize code to match the JIT environment.
|
350
|
+
//! x86::Assembler a(&code); // Create and attach x86::Assembler to code.
|
351
|
+
//!
|
352
|
+
//! // Decide which registers will be mapped to function arguments. Try changing
|
353
|
+
//! // registers of dst, src_a, and src_b and see what happens in function's
|
354
|
+
//! // prolog and epilog.
|
355
|
+
//! x86::Gp dst = a.zax();
|
356
|
+
//! x86::Gp src_a = a.zcx();
|
357
|
+
//! x86::Gp src_b = a.zdx();
|
358
|
+
//!
|
359
|
+
//! X86::Xmm vec0 = x86::xmm0;
|
360
|
+
//! X86::Xmm vec1 = x86::xmm1;
|
361
|
+
//!
|
362
|
+
//! // Create/initialize FuncDetail and FuncFrame.
|
363
|
+
//! FuncDetail func;
|
364
|
+
//! func.init(FuncSignatureT<void, int*, const int*, const int*>(CallConvId::kHost));
|
365
|
+
//!
|
366
|
+
//! FuncFrame frame;
|
367
|
+
//! frame.init(func);
|
368
|
+
//!
|
369
|
+
//! // Make XMM0 and XMM1 dirty - RegGroup::kVec describes XMM|YMM|ZMM registers.
|
370
|
+
//! frame.setDirtyRegs(RegGroup::kVec, IntUtils::mask(0, 1));
|
371
|
+
//!
|
372
|
+
//! // Alternatively, if you don't want to use register masks you can pass BaseReg
|
373
|
+
//! // to addDirtyRegs(). The following code would add both xmm0 and xmm1.
|
374
|
+
//! frame.addDirtyRegs(x86::xmm0, x86::xmm1);
|
375
|
+
//!
|
376
|
+
//! FuncArgsAssignment args(&func); // Create arguments assignment context.
|
377
|
+
//! args.assignAll(dst, src_a, src_b);// Assign our registers to arguments.
|
378
|
+
//! args.updateFrameInfo(frame); // Reflect our args in FuncFrame.
|
379
|
+
//! frame.finalize(); // Finalize the FuncFrame (updates it).
|
380
|
+
//!
|
381
|
+
//! a.emitProlog(frame); // Emit function prolog.
|
382
|
+
//! a.emitArgsAssignment(frame, args);// Assign arguments to registers.
|
383
|
+
//! a.movdqu(vec0, x86::ptr(src_a)); // Load 4 ints from [src_a] to XMM0.
|
384
|
+
//! a.movdqu(vec1, x86::ptr(src_b)); // Load 4 ints from [src_b] to XMM1.
|
385
|
+
//! a.paddd(vec0, vec1); // Add 4 ints in XMM1 to XMM0.
|
386
|
+
//! a.movdqu(x86::ptr(dst), vec0); // Store the result to [dst].
|
387
|
+
//! a.emitEpilog(frame); // Emit function epilog and return.
|
388
|
+
//!
|
389
|
+
//! SumIntsFunc fn;
|
390
|
+
//! Error err = rt.add(&fn, &code); // Add the generated code to the runtime.
|
391
|
+
//! if (err) return 1; // Handle a possible error case.
|
392
|
+
//!
|
393
|
+
//! // Execute the generated function.
|
394
|
+
//! int inA[4] = { 4, 3, 2, 1 };
|
395
|
+
//! int inB[4] = { 1, 5, 2, 8 };
|
396
|
+
//! int out[4];
|
397
|
+
//! fn(out, inA, inB);
|
398
|
+
//!
|
399
|
+
//! // Prints {5 8 4 9}
|
400
|
+
//! printf("{%d %d %d %d}\n", out[0], out[1], out[2], out[3]);
|
401
|
+
//!
|
402
|
+
//! rt.release(fn);
|
403
|
+
//! return 0;
|
404
|
+
//! }
|
405
|
+
//! ```
|
406
|
+
//!
|
407
|
+
//! ### Using x86::Assembler as Code-Patcher
|
408
|
+
//!
|
409
|
+
//! This is an advanced topic that is sometimes unavoidable. AsmJit by default appends machine code it generates
|
410
|
+
//! into a \ref CodeBuffer, however, it also allows to set the offset in \ref CodeBuffer explicitly and to overwrite
|
411
|
+
//! its content. This technique is extremely dangerous as X86 instructions have variable length (see below), so you
|
412
|
+
//! should in general only patch code to change instruction's immediate values or some other details not known the
|
413
|
+
//! at a time the instruction was emitted. A typical scenario that requires code-patching is when you start emitting
|
414
|
+
//! function and you don't know how much stack you want to reserve for it.
|
415
|
+
//!
|
416
|
+
//! Before we go further it's important to introduce instruction options, because they can help with code-patching
|
417
|
+
//! (and not only patching, but that will be explained in AVX-512 section):
|
418
|
+
//!
|
419
|
+
//! - Many general-purpose instructions (especially arithmetic ones) on X86 have multiple encodings - in AsmJit
|
420
|
+
//! this is usually called 'short form' and 'long form'.
|
421
|
+
//!
|
422
|
+
//! - AsmJit always tries to use 'short form' as it makes the resulting machine-code smaller, which is always
|
423
|
+
//! good - this decision is used by majority of assemblers out there.
|
424
|
+
//!
|
425
|
+
//! - AsmJit allows to override the default decision by using `short_()` and `long_()` instruction options to force
|
426
|
+
//! short or long form, respectively. The most useful is `long_()` as it basically forces AsmJit to always emit
|
427
|
+
//! the longest form. The `short_()` is not that useful as it's automatic (except jumps to non-bound labels). Note
|
428
|
+
//! that the underscore after each function name avoids collision with built-in C++ types.
|
429
|
+
//!
|
430
|
+
//! To illustrate what short form and long form means in binary let's assume we want to emit "add esp, 16" instruction,
|
431
|
+
//! which has two possible binary encodings:
|
432
|
+
//!
|
433
|
+
//! - `83C410` - This is a short form aka `short add esp, 16` - You can see opcode byte (0x8C), MOD/RM byte (0xC4)
|
434
|
+
//! and an 8-bit immediate value representing `16`.
|
435
|
+
//!
|
436
|
+
//! - `81C410000000` - This is a long form aka `long add esp, 16` - You can see a different opcode byte (0x81), the
|
437
|
+
//! same Mod/RM byte (0xC4) and a 32-bit immediate in little-endian representing `16`.
|
438
|
+
//!
|
439
|
+
//! It should be obvious that patching an existing instruction into an instruction having a different size may create
|
440
|
+
//! various problems. So it's recommended to be careful and to only patch instructions into instructions having the
|
441
|
+
//! same size. The example below demonstrates how instruction options can be used to guarantee the size of an
|
442
|
+
//! instruction by forcing the assembler to use long-form encoding:
|
443
|
+
//!
|
444
|
+
//! ```
|
445
|
+
//! #include <asmjit/x86.h>
|
446
|
+
//! #include <stdio.h>
|
447
|
+
//!
|
448
|
+
//! using namespace asmjit;
|
449
|
+
//!
|
450
|
+
//! typedef int (*Func)(void);
|
451
|
+
//!
|
452
|
+
//! int main(int argc, char* argv[]) {
|
453
|
+
//! JitRuntime rt; // Create a runtime specialized for JIT.
|
454
|
+
//! CodeHolder code; // Create a CodeHolder.
|
455
|
+
//!
|
456
|
+
//! code.init(rt.environment()); // Initialize code to match the JIT environment.
|
457
|
+
//! x86::Assembler a(&code); // Create and attach x86::Assembler to code.
|
458
|
+
//!
|
459
|
+
//! // Let's get these registers from x86::Assembler.
|
460
|
+
//! x86::Gp zbp = a.zbp();
|
461
|
+
//! x86::Gp zsp = a.zsp();
|
462
|
+
//!
|
463
|
+
//! // Function prolog.
|
464
|
+
//! a.push(zbp);
|
465
|
+
//! a.mov(zbp, zsp);
|
466
|
+
//!
|
467
|
+
//! // This is where we are gonna patch the code later, so let's get the offset
|
468
|
+
//! // (the current location) from the beginning of the code-buffer.
|
469
|
+
//! size_t patchOffset = a.offset();
|
470
|
+
//! // Let's just emit 'sub zsp, 0' for now, but don't forget to use LONG form.
|
471
|
+
//! a.long_().sub(zsp, 0);
|
472
|
+
//!
|
473
|
+
//! // ... emit some code (this just sets return value to zero) ...
|
474
|
+
//! a.xor_(x86::eax, x86::eax);
|
475
|
+
//!
|
476
|
+
//! // Function epilog and return.
|
477
|
+
//! a.mov(zsp, zbp);
|
478
|
+
//! a.pop(zbp);
|
479
|
+
//! a.ret();
|
480
|
+
//!
|
481
|
+
//! // Now we know how much stack size we want to reserve. I have chosen 128
|
482
|
+
//! // bytes on purpose as it's encodable only in long form that we have used.
|
483
|
+
//!
|
484
|
+
//! int stackSize = 128; // Number of bytes to reserve on the stack.
|
485
|
+
//! a.setOffset(patchOffset); // Move the current cursor to `patchOffset`.
|
486
|
+
//! a.long_().sub(zsp, stackSize); // Patch the code; don't forget to use LONG form.
|
487
|
+
//!
|
488
|
+
//! // Now the code is ready to be called
|
489
|
+
//! Func fn;
|
490
|
+
//! Error err = rt.add(&fn, &code); // Add the generated code to the runtime.
|
491
|
+
//! if (err) return 1; // Handle a possible error returned by AsmJit.
|
492
|
+
//!
|
493
|
+
//! int result = fn(); // Execute the generated code.
|
494
|
+
//! printf("%d\n", result); // Print the resulting "0".
|
495
|
+
//!
|
496
|
+
//! rt.release(fn); // Remove the function from the runtime.
|
497
|
+
//! return 0;
|
498
|
+
//! }
|
499
|
+
//! ```
|
500
|
+
//!
|
501
|
+
//! If you run the example it will just work, because both instructions have the same size. As an experiment you can
|
502
|
+
//! try removing `long_()` form to see what happens when wrong code is generated.
|
503
|
+
//!
|
504
|
+
//! ### Code Patching and REX Prefix
|
505
|
+
//!
|
506
|
+
//! In 64-bit mode there is one more thing to worry about when patching code: REX prefix. It's a single byte prefix
|
507
|
+
//! designed to address registers with ids from 9 to 15 and to override the default width of operation from 32 to 64
|
508
|
+
//! bits. AsmJit, like other assemblers, only emits REX prefix when it's necessary. If the patched code only changes
|
509
|
+
//! the immediate value as shown in the previous example then there is nothing to worry about as it doesn't change
|
510
|
+
//! the logic behind emitting REX prefix, however, if the patched code changes register id or overrides the operation
|
511
|
+
//! width then it's important to take care of REX prefix as well.
|
512
|
+
//!
|
513
|
+
//! AsmJit contains another instruction option that controls (forces) REX prefix - `rex()`. If you use it the
|
514
|
+
//! instruction emitted will always use REX prefix even when it's encodable without it. The following list contains
|
515
|
+
//! some instructions and their binary representations to illustrate when it's emitted:
|
516
|
+
//!
|
517
|
+
//! - `__83C410` - `add esp, 16` - 32-bit operation in 64-bit mode doesn't require REX prefix.
|
518
|
+
//! - `4083C410` - `rex add esp, 16` - 32-bit operation in 64-bit mode with forced REX prefix (0x40).
|
519
|
+
//! - `4883C410` - `add rsp, 16` - 64-bit operation in 64-bit mode requires REX prefix (0x48).
|
520
|
+
//! - `4183C410` - `add r12d, 16` - 32-bit operation in 64-bit mode using R12D requires REX prefix (0x41).
|
521
|
+
//! - `4983C410` - `add r12, 16` - 64-bit operation in 64-bit mode using R12 requires REX prefix (0x49).
|
522
|
+
//!
|
523
|
+
//! ### More Prefixes
|
524
|
+
//!
|
525
|
+
//! X86 architecture is known for its prefixes. AsmJit supports all prefixes
|
526
|
+
//! that can affect how the instruction is encoded:
|
527
|
+
//!
|
528
|
+
//! ```
|
529
|
+
//! #include <asmjit/x86.h>
|
530
|
+
//!
|
531
|
+
//! using namespace asmjit;
|
532
|
+
//!
|
533
|
+
//! void prefixesExample(x86::Assembler& a) {
|
534
|
+
//! // Lock prefix for implementing atomics:
|
535
|
+
//! // lock add dword ptr [dst], 1
|
536
|
+
//! a.lock().add(x86::dword_ptr(dst), 1);
|
537
|
+
//!
|
538
|
+
//! // Similarly, XAcquire/XRelease prefixes are also available:
|
539
|
+
//! // xacquire add dword ptr [dst], 1
|
540
|
+
//! a.xacquire().add(x86::dword_ptr(dst), 1);
|
541
|
+
//!
|
542
|
+
//! // Rep prefix (see also repe/repz and repne/repnz):
|
543
|
+
//! // rep movs byte ptr [dst], byte ptr [src]
|
544
|
+
//! a.rep().movs(x86::byte_ptr(dst), x86::byte_ptr(src));
|
545
|
+
//!
|
546
|
+
//! // Forcing REX prefix in 64-bit mode.
|
547
|
+
//! // rex mov eax, 1
|
548
|
+
//! a.rex().mov(x86::eax, 1);
|
549
|
+
//!
|
550
|
+
//! // AVX instruction without forced prefix uses the shortest encoding:
|
551
|
+
//! // vaddpd xmm0, xmm1, xmm2 -> [C5|F1|58|C2]
|
552
|
+
//! a.vaddpd(x86::xmm0, x86::xmm1, x86::xmm2);
|
553
|
+
//!
|
554
|
+
//! // Forcing VEX3 prefix (AVX):
|
555
|
+
//! // vex3 vaddpd xmm0, xmm1, xmm2 -> [C4|E1|71|58|C2]
|
556
|
+
//! a.vex3().vaddpd(x86::xmm0, x86::xmm1, x86::xmm2);
|
557
|
+
//!
|
558
|
+
//! // Forcing EVEX prefix (AVX512):
|
559
|
+
//! // evex vaddpd xmm0, xmm1, xmm2 -> [62|F1|F5|08|58|C2]
|
560
|
+
//! a.evex().vaddpd(x86::xmm0, x86::xmm1, x86::xmm2);
|
561
|
+
//!
|
562
|
+
//! // Some instructions accept prefixes not originally intended to:
|
563
|
+
//! // rep ret
|
564
|
+
//! a.rep().ret();
|
565
|
+
//! }
|
566
|
+
//! ```
|
567
|
+
//!
|
568
|
+
//! It's important to understand that prefixes are part of instruction options. When a member function that involves
|
569
|
+
//! adding a prefix is called the prefix is combined with existing instruction options, which will affect the next
|
570
|
+
//! instruction generated.
|
571
|
+
//!
|
572
|
+
//! ### Generating AVX512 code.
|
573
|
+
//!
|
574
|
+
//! x86::Assembler can generate AVX512+ code including the use of opmask registers. Opmask can be specified through
|
575
|
+
//! \ref x86::Assembler::k() function, which stores it as an extra register, which will be used by the next
|
576
|
+
//! instruction. AsmJit uses such concept for manipulating instruction options as well.
|
577
|
+
//!
|
578
|
+
//! The following AVX512 features are supported:
|
579
|
+
//!
|
580
|
+
//! - Opmask selector {k} and zeroing {z}.
|
581
|
+
//! - Rounding modes {rn|rd|ru|rz} and suppress-all-exceptions {sae} option.
|
582
|
+
//! - AVX512 broadcasts {1toN}.
|
583
|
+
//!
|
584
|
+
//! The following example demonstrates how AVX512 features can be used:
|
585
|
+
//!
|
586
|
+
//! ```
|
587
|
+
//! #include <asmjit/x86.h>
|
588
|
+
//!
|
589
|
+
//! using namespace asmjit;
|
590
|
+
//!
|
591
|
+
//! void generateAVX512Code(x86::Assembler& a) {
|
592
|
+
//! using namespace x86;
|
593
|
+
//!
|
594
|
+
//! // Opmask Selectors
|
595
|
+
//! // ----------------
|
596
|
+
//! //
|
597
|
+
//! // - Opmask / zeroing is part of the instruction options / extraReg.
|
598
|
+
//! // - k(reg) is like {kreg} in Intel syntax.
|
599
|
+
//! // - z() is like {z} in Intel syntax.
|
600
|
+
//!
|
601
|
+
//! // vaddpd zmm {k1} {z}, zmm1, zmm2
|
602
|
+
//! a.k(k1).z().vaddpd(zmm0, zmm1, zmm2);
|
603
|
+
//!
|
604
|
+
//! // Memory Broadcasts
|
605
|
+
//! // -----------------
|
606
|
+
//! //
|
607
|
+
//! // - Broadcast data is part of memory operand.
|
608
|
+
//! // - Use x86::Mem::_1toN(), which returns a new x86::Mem operand.
|
609
|
+
//!
|
610
|
+
//! // vaddpd zmm0 {k1} {z}, zmm1, [rcx] {1to8}
|
611
|
+
//! a.k(k1).z().vaddpd(zmm0, zmm1, x86::mem(rcx)._1to8());
|
612
|
+
//!
|
613
|
+
//! // Embedded Rounding & Suppress-All-Exceptoins
|
614
|
+
//! // -------------------------------------------
|
615
|
+
//! //
|
616
|
+
//! // - Rounding mode and {sae} are part of instruction options.
|
617
|
+
//! // - Use sae() to enable exception suppression.
|
618
|
+
//! // - Use rn_sae(), rd_sae(), ru_sae(), and rz_sae() - to enable rounding.
|
619
|
+
//! // - Embedded rounding implicitly sets {sae} as well, that's why the API
|
620
|
+
//! // also has sae() suffix, to make it clear.
|
621
|
+
//!
|
622
|
+
//! // vcmppd k1, zmm1, zmm2, 0x00 {sae}
|
623
|
+
//! a.sae().vcmppd(k1, zmm1, zmm2, 0);
|
624
|
+
//!
|
625
|
+
//! // vaddpd zmm0, zmm1, zmm2 {rz}
|
626
|
+
//! a.rz_sae().vaddpd(zmm0, zmm1, zmm2);
|
627
|
+
//! }
|
628
|
+
//! ```
|
629
|
+
class ASMJIT_VIRTAPI Assembler
|
630
|
+
: public BaseAssembler,
|
631
|
+
public EmitterImplicitT<Assembler> {
|
632
|
+
public:
|
633
|
+
ASMJIT_NONCOPYABLE(Assembler)
|
634
|
+
typedef BaseAssembler Base;
|
635
|
+
|
636
|
+
//! \name Construction & Destruction
|
637
|
+
//! \{
|
638
|
+
|
639
|
+
ASMJIT_API explicit Assembler(CodeHolder* code = nullptr) noexcept;
|
640
|
+
ASMJIT_API virtual ~Assembler() noexcept;
|
641
|
+
|
642
|
+
//! \}
|
643
|
+
|
644
|
+
//! \cond INTERNAL
|
645
|
+
//! \name Internal
|
646
|
+
//! \{
|
647
|
+
|
648
|
+
// NOTE: x86::Assembler uses _privateData to store 'address-override' bit that is used to decide whether to emit
|
649
|
+
// address-override (67H) prefix based on the memory BASE+INDEX registers. It's either `kX86MemInfo_67H_X86` or
|
650
|
+
// `kX86MemInfo_67H_X64`.
|
651
|
+
inline uint32_t _addressOverrideMask() const noexcept { return _privateData; }
|
652
|
+
inline void _setAddressOverrideMask(uint32_t m) noexcept { _privateData = m; }
|
653
|
+
|
654
|
+
//! \}
|
655
|
+
//! \endcond
|
656
|
+
|
657
|
+
//! \name Emit
|
658
|
+
//! \{
|
659
|
+
|
660
|
+
ASMJIT_API Error _emit(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) override;
|
661
|
+
|
662
|
+
//! \}
|
663
|
+
//! \endcond
|
664
|
+
|
665
|
+
//! \name Align
|
666
|
+
//! \{
|
667
|
+
|
668
|
+
ASMJIT_API Error align(AlignMode alignMode, uint32_t alignment) override;
|
669
|
+
|
670
|
+
//! \}
|
671
|
+
|
672
|
+
//! \name Events
|
673
|
+
//! \{
|
674
|
+
|
675
|
+
ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
|
676
|
+
ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
|
677
|
+
|
678
|
+
//! \}
|
679
|
+
};
|
680
|
+
|
681
|
+
//! \}
|
682
|
+
|
683
|
+
ASMJIT_END_SUB_NAMESPACE
|
684
|
+
|
685
|
+
#endif // ASMJIT_X86_X86ASSEMBLER_H_INCLUDED
|