asmjit 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/asmjit.gemspec +1 -1
- data/ext/asmjit/asmjit/.editorconfig +10 -0
- data/ext/asmjit/asmjit/.github/FUNDING.yml +1 -0
- data/ext/asmjit/asmjit/.github/workflows/build-config.json +47 -0
- data/ext/asmjit/asmjit/.github/workflows/build.yml +156 -0
- data/ext/asmjit/asmjit/.gitignore +6 -0
- data/ext/asmjit/asmjit/CMakeLists.txt +611 -0
- data/ext/asmjit/asmjit/LICENSE.md +17 -0
- data/ext/asmjit/asmjit/README.md +69 -0
- data/ext/asmjit/asmjit/src/asmjit/a64.h +62 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64archtraits_p.h +81 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64assembler.cpp +5115 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64assembler.h +72 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64builder.cpp +51 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64builder.h +57 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64compiler.cpp +60 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64compiler.h +247 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64emithelper.cpp +464 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64emithelper_p.h +50 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64emitter.h +1228 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64formatter.cpp +298 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64formatter_p.h +59 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64func.cpp +189 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64func_p.h +33 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64globals.h +1894 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64instapi.cpp +278 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64instapi_p.h +41 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb.cpp +1957 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb.h +74 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb_p.h +876 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64operand.cpp +85 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64operand.h +312 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64rapass.cpp +852 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64rapass_p.h +105 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64utils.h +179 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/armformatter.cpp +143 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/armformatter_p.h +44 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/armglobals.h +21 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/armoperand.h +621 -0
- data/ext/asmjit/asmjit/src/asmjit/arm.h +62 -0
- data/ext/asmjit/asmjit/src/asmjit/asmjit-scope-begin.h +17 -0
- data/ext/asmjit/asmjit/src/asmjit/asmjit-scope-end.h +9 -0
- data/ext/asmjit/asmjit/src/asmjit/asmjit.h +33 -0
- data/ext/asmjit/asmjit/src/asmjit/core/api-build_p.h +55 -0
- data/ext/asmjit/asmjit/src/asmjit/core/api-config.h +613 -0
- data/ext/asmjit/asmjit/src/asmjit/core/archcommons.h +229 -0
- data/ext/asmjit/asmjit/src/asmjit/core/archtraits.cpp +160 -0
- data/ext/asmjit/asmjit/src/asmjit/core/archtraits.h +290 -0
- data/ext/asmjit/asmjit/src/asmjit/core/assembler.cpp +406 -0
- data/ext/asmjit/asmjit/src/asmjit/core/assembler.h +129 -0
- data/ext/asmjit/asmjit/src/asmjit/core/builder.cpp +889 -0
- data/ext/asmjit/asmjit/src/asmjit/core/builder.h +1391 -0
- data/ext/asmjit/asmjit/src/asmjit/core/codebuffer.h +113 -0
- data/ext/asmjit/asmjit/src/asmjit/core/codeholder.cpp +1149 -0
- data/ext/asmjit/asmjit/src/asmjit/core/codeholder.h +1035 -0
- data/ext/asmjit/asmjit/src/asmjit/core/codewriter.cpp +175 -0
- data/ext/asmjit/asmjit/src/asmjit/core/codewriter_p.h +179 -0
- data/ext/asmjit/asmjit/src/asmjit/core/compiler.cpp +582 -0
- data/ext/asmjit/asmjit/src/asmjit/core/compiler.h +737 -0
- data/ext/asmjit/asmjit/src/asmjit/core/compilerdefs.h +173 -0
- data/ext/asmjit/asmjit/src/asmjit/core/constpool.cpp +363 -0
- data/ext/asmjit/asmjit/src/asmjit/core/constpool.h +250 -0
- data/ext/asmjit/asmjit/src/asmjit/core/cpuinfo.cpp +1162 -0
- data/ext/asmjit/asmjit/src/asmjit/core/cpuinfo.h +813 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emithelper.cpp +323 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emithelper_p.h +58 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emitter.cpp +333 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emitter.h +741 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emitterutils.cpp +129 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emitterutils_p.h +89 -0
- data/ext/asmjit/asmjit/src/asmjit/core/environment.cpp +46 -0
- data/ext/asmjit/asmjit/src/asmjit/core/environment.h +508 -0
- data/ext/asmjit/asmjit/src/asmjit/core/errorhandler.cpp +14 -0
- data/ext/asmjit/asmjit/src/asmjit/core/errorhandler.h +228 -0
- data/ext/asmjit/asmjit/src/asmjit/core/formatter.cpp +584 -0
- data/ext/asmjit/asmjit/src/asmjit/core/formatter.h +247 -0
- data/ext/asmjit/asmjit/src/asmjit/core/formatter_p.h +34 -0
- data/ext/asmjit/asmjit/src/asmjit/core/func.cpp +286 -0
- data/ext/asmjit/asmjit/src/asmjit/core/func.h +1445 -0
- data/ext/asmjit/asmjit/src/asmjit/core/funcargscontext.cpp +293 -0
- data/ext/asmjit/asmjit/src/asmjit/core/funcargscontext_p.h +199 -0
- data/ext/asmjit/asmjit/src/asmjit/core/globals.cpp +133 -0
- data/ext/asmjit/asmjit/src/asmjit/core/globals.h +393 -0
- data/ext/asmjit/asmjit/src/asmjit/core/inst.cpp +113 -0
- data/ext/asmjit/asmjit/src/asmjit/core/inst.h +772 -0
- data/ext/asmjit/asmjit/src/asmjit/core/jitallocator.cpp +1242 -0
- data/ext/asmjit/asmjit/src/asmjit/core/jitallocator.h +261 -0
- data/ext/asmjit/asmjit/src/asmjit/core/jitruntime.cpp +80 -0
- data/ext/asmjit/asmjit/src/asmjit/core/jitruntime.h +89 -0
- data/ext/asmjit/asmjit/src/asmjit/core/logger.cpp +69 -0
- data/ext/asmjit/asmjit/src/asmjit/core/logger.h +198 -0
- data/ext/asmjit/asmjit/src/asmjit/core/misc_p.h +33 -0
- data/ext/asmjit/asmjit/src/asmjit/core/operand.cpp +132 -0
- data/ext/asmjit/asmjit/src/asmjit/core/operand.h +1611 -0
- data/ext/asmjit/asmjit/src/asmjit/core/osutils.cpp +84 -0
- data/ext/asmjit/asmjit/src/asmjit/core/osutils.h +61 -0
- data/ext/asmjit/asmjit/src/asmjit/core/osutils_p.h +68 -0
- data/ext/asmjit/asmjit/src/asmjit/core/raassignment_p.h +418 -0
- data/ext/asmjit/asmjit/src/asmjit/core/rabuilders_p.h +612 -0
- data/ext/asmjit/asmjit/src/asmjit/core/radefs_p.h +1204 -0
- data/ext/asmjit/asmjit/src/asmjit/core/ralocal.cpp +1166 -0
- data/ext/asmjit/asmjit/src/asmjit/core/ralocal_p.h +254 -0
- data/ext/asmjit/asmjit/src/asmjit/core/rapass.cpp +1969 -0
- data/ext/asmjit/asmjit/src/asmjit/core/rapass_p.h +1183 -0
- data/ext/asmjit/asmjit/src/asmjit/core/rastack.cpp +184 -0
- data/ext/asmjit/asmjit/src/asmjit/core/rastack_p.h +171 -0
- data/ext/asmjit/asmjit/src/asmjit/core/string.cpp +559 -0
- data/ext/asmjit/asmjit/src/asmjit/core/string.h +372 -0
- data/ext/asmjit/asmjit/src/asmjit/core/support.cpp +494 -0
- data/ext/asmjit/asmjit/src/asmjit/core/support.h +1773 -0
- data/ext/asmjit/asmjit/src/asmjit/core/target.cpp +14 -0
- data/ext/asmjit/asmjit/src/asmjit/core/target.h +53 -0
- data/ext/asmjit/asmjit/src/asmjit/core/type.cpp +74 -0
- data/ext/asmjit/asmjit/src/asmjit/core/type.h +419 -0
- data/ext/asmjit/asmjit/src/asmjit/core/virtmem.cpp +722 -0
- data/ext/asmjit/asmjit/src/asmjit/core/virtmem.h +242 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zone.cpp +353 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zone.h +615 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonehash.cpp +309 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonehash.h +186 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonelist.cpp +163 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonelist.h +209 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonestack.cpp +176 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonestack.h +239 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonestring.h +120 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonetree.cpp +99 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonetree.h +380 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonevector.cpp +356 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonevector.h +690 -0
- data/ext/asmjit/asmjit/src/asmjit/core.h +1861 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86archtraits_p.h +148 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86assembler.cpp +5110 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86assembler.h +685 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86builder.cpp +52 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86builder.h +351 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86compiler.cpp +61 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86compiler.h +721 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86emithelper.cpp +619 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86emithelper_p.h +60 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86emitter.h +4315 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86formatter.cpp +944 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86formatter_p.h +58 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86func.cpp +503 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86func_p.h +33 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86globals.h +2169 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86instapi.cpp +1732 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86instapi_p.h +41 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb.cpp +4427 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb.h +563 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb_p.h +311 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86opcode_p.h +436 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86operand.cpp +231 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86operand.h +1085 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86rapass.cpp +1509 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86rapass_p.h +94 -0
- data/ext/asmjit/asmjit/src/asmjit/x86.h +93 -0
- data/ext/asmjit/asmjit/src/asmjit.natvis +245 -0
- data/ext/asmjit/asmjit/test/asmjit_test_assembler.cpp +84 -0
- data/ext/asmjit/asmjit/test/asmjit_test_assembler.h +85 -0
- data/ext/asmjit/asmjit/test/asmjit_test_assembler_a64.cpp +4006 -0
- data/ext/asmjit/asmjit/test/asmjit_test_assembler_x64.cpp +17833 -0
- data/ext/asmjit/asmjit/test/asmjit_test_assembler_x86.cpp +8300 -0
- data/ext/asmjit/asmjit/test/asmjit_test_compiler.cpp +253 -0
- data/ext/asmjit/asmjit/test/asmjit_test_compiler.h +73 -0
- data/ext/asmjit/asmjit/test/asmjit_test_compiler_a64.cpp +690 -0
- data/ext/asmjit/asmjit/test/asmjit_test_compiler_x86.cpp +4317 -0
- data/ext/asmjit/asmjit/test/asmjit_test_emitters.cpp +197 -0
- data/ext/asmjit/asmjit/test/asmjit_test_instinfo.cpp +181 -0
- data/ext/asmjit/asmjit/test/asmjit_test_misc.h +257 -0
- data/ext/asmjit/asmjit/test/asmjit_test_perf.cpp +62 -0
- data/ext/asmjit/asmjit/test/asmjit_test_perf.h +61 -0
- data/ext/asmjit/asmjit/test/asmjit_test_perf_a64.cpp +699 -0
- data/ext/asmjit/asmjit/test/asmjit_test_perf_x86.cpp +5032 -0
- data/ext/asmjit/asmjit/test/asmjit_test_unit.cpp +172 -0
- data/ext/asmjit/asmjit/test/asmjit_test_x86_sections.cpp +172 -0
- data/ext/asmjit/asmjit/test/asmjitutils.h +38 -0
- data/ext/asmjit/asmjit/test/broken.cpp +312 -0
- data/ext/asmjit/asmjit/test/broken.h +148 -0
- data/ext/asmjit/asmjit/test/cmdline.h +61 -0
- data/ext/asmjit/asmjit/test/performancetimer.h +41 -0
- data/ext/asmjit/asmjit/tools/configure-makefiles.sh +13 -0
- data/ext/asmjit/asmjit/tools/configure-ninja.sh +13 -0
- data/ext/asmjit/asmjit/tools/configure-sanitizers.sh +13 -0
- data/ext/asmjit/asmjit/tools/configure-vs2019-x64.bat +2 -0
- data/ext/asmjit/asmjit/tools/configure-vs2019-x86.bat +2 -0
- data/ext/asmjit/asmjit/tools/configure-vs2022-x64.bat +2 -0
- data/ext/asmjit/asmjit/tools/configure-vs2022-x86.bat +2 -0
- data/ext/asmjit/asmjit/tools/configure-xcode.sh +8 -0
- data/ext/asmjit/asmjit/tools/enumgen.js +417 -0
- data/ext/asmjit/asmjit/tools/enumgen.sh +3 -0
- data/ext/asmjit/asmjit/tools/tablegen-arm.js +365 -0
- data/ext/asmjit/asmjit/tools/tablegen-arm.sh +3 -0
- data/ext/asmjit/asmjit/tools/tablegen-x86.js +2638 -0
- data/ext/asmjit/asmjit/tools/tablegen-x86.sh +3 -0
- data/ext/asmjit/asmjit/tools/tablegen.js +947 -0
- data/ext/asmjit/asmjit/tools/tablegen.sh +4 -0
- data/ext/asmjit/asmjit.cc +18 -0
- data/lib/asmjit/version.rb +1 -1
- metadata +197 -2
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
// This file is part of AsmJit project <https://asmjit.com>
|
|
2
|
+
//
|
|
3
|
+
// See asmjit.h or LICENSE.md for license and copyright information
|
|
4
|
+
// SPDX-License-Identifier: Zlib
|
|
5
|
+
|
|
6
|
+
#include <asmjit/core.h>
|
|
7
|
+
|
|
8
|
+
#if !defined(ASMJIT_NO_X86) && ASMJIT_ARCH_X86
|
|
9
|
+
#include <asmjit/x86.h>
|
|
10
|
+
|
|
11
|
+
#include <stdio.h>
|
|
12
|
+
#include <stdlib.h>
|
|
13
|
+
#include <string.h>
|
|
14
|
+
|
|
15
|
+
using namespace asmjit;
|
|
16
|
+
|
|
17
|
+
// Signature of the generated function.
|
|
18
|
+
typedef void (*SumIntsFunc)(int* dst, const int* a, const int* b);
|
|
19
|
+
|
|
20
|
+
// This function works with both x86::Assembler and x86::Builder. It shows how
|
|
21
|
+
// `x86::Emitter` can be used to make your code more generic.
|
|
22
|
+
static void makeRawFunc(x86::Emitter* emitter) noexcept {
|
|
23
|
+
// Decide which registers will be mapped to function arguments. Try changing
|
|
24
|
+
// registers of `dst`, `src_a`, and `src_b` and see what happens in function's
|
|
25
|
+
// prolog and epilog.
|
|
26
|
+
x86::Gp dst = emitter->zax();
|
|
27
|
+
x86::Gp src_a = emitter->zcx();
|
|
28
|
+
x86::Gp src_b = emitter->zdx();
|
|
29
|
+
|
|
30
|
+
// Decide which vector registers to use. We use these to keep the code generic,
|
|
31
|
+
// you can switch to any other registers when needed.
|
|
32
|
+
x86::Xmm vec0 = x86::xmm0;
|
|
33
|
+
x86::Xmm vec1 = x86::xmm1;
|
|
34
|
+
|
|
35
|
+
// Create and initialize `FuncDetail` and `FuncFrame`.
|
|
36
|
+
FuncDetail func;
|
|
37
|
+
func.init(FuncSignatureT<void, int*, const int*, const int*>(CallConvId::kHost), emitter->environment());
|
|
38
|
+
|
|
39
|
+
FuncFrame frame;
|
|
40
|
+
frame.init(func);
|
|
41
|
+
|
|
42
|
+
// Make XMM0 and XMM1 dirty. VEC group includes XMM|YMM|ZMM registers.
|
|
43
|
+
frame.addDirtyRegs(x86::xmm0, x86::xmm1);
|
|
44
|
+
|
|
45
|
+
FuncArgsAssignment args(&func); // Create arguments assignment context.
|
|
46
|
+
args.assignAll(dst, src_a, src_b); // Assign our registers to arguments.
|
|
47
|
+
args.updateFuncFrame(frame); // Reflect our args in FuncFrame.
|
|
48
|
+
frame.finalize();
|
|
49
|
+
|
|
50
|
+
// Emit prolog and allocate arguments to registers.
|
|
51
|
+
emitter->emitProlog(frame);
|
|
52
|
+
emitter->emitArgsAssignment(frame, args);
|
|
53
|
+
|
|
54
|
+
emitter->movdqu(vec0, x86::ptr(src_a)); // Load 4 ints from [src_a] to XMM0.
|
|
55
|
+
emitter->movdqu(vec1, x86::ptr(src_b)); // Load 4 ints from [src_b] to XMM1.
|
|
56
|
+
|
|
57
|
+
emitter->paddd(vec0, vec1); // Add 4 ints in XMM1 to XMM0.
|
|
58
|
+
emitter->movdqu(x86::ptr(dst), vec0); // Store the result to [dst].
|
|
59
|
+
|
|
60
|
+
// Emit epilog and return.
|
|
61
|
+
emitter->emitEpilog(frame);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
#ifndef ASMJIT_NO_COMPILER
|
|
65
|
+
// This function works with x86::Compiler, provided for comparison.
|
|
66
|
+
static void makeCompiledFunc(x86::Compiler* cc) noexcept {
|
|
67
|
+
x86::Gp dst = cc->newIntPtr("dst");
|
|
68
|
+
x86::Gp src_a = cc->newIntPtr("src_a");
|
|
69
|
+
x86::Gp src_b = cc->newIntPtr("src_b");
|
|
70
|
+
x86::Xmm vec0 = cc->newXmm("vec0");
|
|
71
|
+
x86::Xmm vec1 = cc->newXmm("vec1");
|
|
72
|
+
|
|
73
|
+
FuncNode* funcNode = cc->addFunc(FuncSignatureT<void, int*, const int*, const int*>(CallConvId::kHost));
|
|
74
|
+
funcNode->setArg(0, dst);
|
|
75
|
+
funcNode->setArg(1, src_a);
|
|
76
|
+
funcNode->setArg(2, src_b);
|
|
77
|
+
|
|
78
|
+
cc->movdqu(vec0, x86::ptr(src_a));
|
|
79
|
+
cc->movdqu(vec1, x86::ptr(src_b));
|
|
80
|
+
cc->paddd(vec0, vec1);
|
|
81
|
+
cc->movdqu(x86::ptr(dst), vec0);
|
|
82
|
+
cc->endFunc();
|
|
83
|
+
}
|
|
84
|
+
#endif
|
|
85
|
+
|
|
86
|
+
static uint32_t testFunc(JitRuntime& rt, EmitterType emitterType) noexcept {
|
|
87
|
+
#ifndef ASMJIT_NO_LOGGING
|
|
88
|
+
FileLogger logger(stdout);
|
|
89
|
+
logger.setIndentation(FormatIndentationGroup::kCode, 2);
|
|
90
|
+
#endif
|
|
91
|
+
|
|
92
|
+
CodeHolder code;
|
|
93
|
+
code.init(rt.environment());
|
|
94
|
+
|
|
95
|
+
#ifndef ASMJIT_NO_LOGGING
|
|
96
|
+
code.setLogger(&logger);
|
|
97
|
+
#endif
|
|
98
|
+
|
|
99
|
+
Error err = kErrorOk;
|
|
100
|
+
switch (emitterType) {
|
|
101
|
+
case EmitterType::kNone: {
|
|
102
|
+
break;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
case EmitterType::kAssembler: {
|
|
106
|
+
printf("Using x86::Assembler:\n");
|
|
107
|
+
x86::Assembler a(&code);
|
|
108
|
+
makeRawFunc(a.as<x86::Emitter>());
|
|
109
|
+
break;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
#ifndef ASMJIT_NO_BUILDER
|
|
113
|
+
case EmitterType::kBuilder: {
|
|
114
|
+
printf("Using x86::Builder:\n");
|
|
115
|
+
x86::Builder cb(&code);
|
|
116
|
+
makeRawFunc(cb.as<x86::Emitter>());
|
|
117
|
+
|
|
118
|
+
err = cb.finalize();
|
|
119
|
+
if (err) {
|
|
120
|
+
printf("** FAILURE: x86::Builder::finalize() failed (%s) **\n", DebugUtils::errorAsString(err));
|
|
121
|
+
return 1;
|
|
122
|
+
}
|
|
123
|
+
break;
|
|
124
|
+
}
|
|
125
|
+
#endif
|
|
126
|
+
|
|
127
|
+
#ifndef ASMJIT_NO_COMPILER
|
|
128
|
+
case EmitterType::kCompiler: {
|
|
129
|
+
printf("Using x86::Compiler:\n");
|
|
130
|
+
x86::Compiler cc(&code);
|
|
131
|
+
makeCompiledFunc(&cc);
|
|
132
|
+
|
|
133
|
+
err = cc.finalize();
|
|
134
|
+
if (err) {
|
|
135
|
+
printf("** FAILURE: x86::Compiler::finalize() failed (%s) **\n", DebugUtils::errorAsString(err));
|
|
136
|
+
return 1;
|
|
137
|
+
}
|
|
138
|
+
break;
|
|
139
|
+
}
|
|
140
|
+
#endif
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Add the code generated to the runtime.
|
|
144
|
+
SumIntsFunc fn;
|
|
145
|
+
err = rt.add(&fn, &code);
|
|
146
|
+
|
|
147
|
+
if (err) {
|
|
148
|
+
printf("** FAILURE: JitRuntime::add() failed (%s) **\n", DebugUtils::errorAsString(err));
|
|
149
|
+
return 1;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Execute the generated function.
|
|
153
|
+
int inA[4] = { 4, 3, 2, 1 };
|
|
154
|
+
int inB[4] = { 1, 5, 2, 8 };
|
|
155
|
+
int out[4];
|
|
156
|
+
fn(out, inA, inB);
|
|
157
|
+
|
|
158
|
+
// Should print {5 8 4 9}.
|
|
159
|
+
printf("Result = { %d %d %d %d }\n\n", out[0], out[1], out[2], out[3]);
|
|
160
|
+
|
|
161
|
+
rt.release(fn);
|
|
162
|
+
return !(out[0] == 5 && out[1] == 8 && out[2] == 4 && out[3] == 9);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
int main() {
|
|
166
|
+
printf("AsmJit Emitters Test-Suite v%u.%u.%u\n",
|
|
167
|
+
unsigned((ASMJIT_LIBRARY_VERSION >> 16) ),
|
|
168
|
+
unsigned((ASMJIT_LIBRARY_VERSION >> 8) & 0xFF),
|
|
169
|
+
unsigned((ASMJIT_LIBRARY_VERSION ) & 0xFF));
|
|
170
|
+
printf("\n");
|
|
171
|
+
|
|
172
|
+
JitRuntime rt;
|
|
173
|
+
unsigned nFailed = 0;
|
|
174
|
+
|
|
175
|
+
nFailed += testFunc(rt, EmitterType::kAssembler);
|
|
176
|
+
|
|
177
|
+
#ifndef ASMJIT_NO_BUILDER
|
|
178
|
+
nFailed += testFunc(rt, EmitterType::kBuilder);
|
|
179
|
+
#endif
|
|
180
|
+
|
|
181
|
+
#ifndef ASMJIT_NO_COMPILER
|
|
182
|
+
nFailed += testFunc(rt, EmitterType::kCompiler);
|
|
183
|
+
#endif
|
|
184
|
+
|
|
185
|
+
if (!nFailed)
|
|
186
|
+
printf("** SUCCESS **\n");
|
|
187
|
+
else
|
|
188
|
+
printf("** FAILURE - %u %s failed ** \n", nFailed, nFailed == 1 ? "test" : "tests");
|
|
189
|
+
|
|
190
|
+
return nFailed ? 1 : 0;
|
|
191
|
+
}
|
|
192
|
+
#else
|
|
193
|
+
int main() {
|
|
194
|
+
printf("AsmJit X86 Emitter Test is disabled on non-x86 host\n\n");
|
|
195
|
+
return 0;
|
|
196
|
+
}
|
|
197
|
+
#endif // !ASMJIT_NO_X86 && ASMJIT_ARCH_X86
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
// This file is part of AsmJit project <https://asmjit.com>
|
|
2
|
+
//
|
|
3
|
+
// See asmjit.h or LICENSE.md for license and copyright information
|
|
4
|
+
// SPDX-License-Identifier: Zlib
|
|
5
|
+
|
|
6
|
+
#include <asmjit/core.h>
|
|
7
|
+
|
|
8
|
+
#if !defined(ASMJIT_NO_X86)
|
|
9
|
+
#include <asmjit/x86.h>
|
|
10
|
+
#endif
|
|
11
|
+
|
|
12
|
+
#include <stdio.h>
|
|
13
|
+
|
|
14
|
+
using namespace asmjit;
|
|
15
|
+
|
|
16
|
+
static char accessLetter(bool r, bool w) noexcept {
|
|
17
|
+
return r && w ? 'X' : r ? 'R' : w ? 'W' : '_';
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
static void printInfo(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount) {
|
|
21
|
+
StringTmp<512> sb;
|
|
22
|
+
|
|
23
|
+
// Read & Write Information
|
|
24
|
+
// ------------------------
|
|
25
|
+
|
|
26
|
+
InstRWInfo rw;
|
|
27
|
+
InstAPI::queryRWInfo(arch, inst, operands, opCount, &rw);
|
|
28
|
+
|
|
29
|
+
#ifndef ASMJIT_NO_LOGGING
|
|
30
|
+
Formatter::formatInstruction(sb, FormatFlags::kNone, nullptr, arch, inst, operands, opCount);
|
|
31
|
+
#else
|
|
32
|
+
sb.append("<Logging-Not-Available>");
|
|
33
|
+
#endif
|
|
34
|
+
sb.append("\n");
|
|
35
|
+
|
|
36
|
+
sb.append(" Operands:\n");
|
|
37
|
+
for (uint32_t i = 0; i < rw.opCount(); i++) {
|
|
38
|
+
const OpRWInfo& op = rw.operand(i);
|
|
39
|
+
|
|
40
|
+
sb.appendFormat(" [%u] Op=%c Read=%016llX Write=%016llX Extend=%016llX",
|
|
41
|
+
i,
|
|
42
|
+
accessLetter(op.isRead(), op.isWrite()),
|
|
43
|
+
op.readByteMask(),
|
|
44
|
+
op.writeByteMask(),
|
|
45
|
+
op.extendByteMask());
|
|
46
|
+
|
|
47
|
+
if (op.isMemBaseUsed()) {
|
|
48
|
+
sb.appendFormat(" Base=%c", accessLetter(op.isMemBaseRead(), op.isMemBaseWrite()));
|
|
49
|
+
if (op.isMemBasePreModify())
|
|
50
|
+
sb.appendFormat(" <PRE>");
|
|
51
|
+
if (op.isMemBasePostModify())
|
|
52
|
+
sb.appendFormat(" <POST>");
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
if (op.isMemIndexUsed()) {
|
|
56
|
+
sb.appendFormat(" Index=%c", accessLetter(op.isMemIndexRead(), op.isMemIndexWrite()));
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
sb.append("\n");
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// CPU Flags (Read/Write)
|
|
63
|
+
// ----------------------
|
|
64
|
+
|
|
65
|
+
if ((rw.readFlags() | rw.writeFlags()) != CpuRWFlags::kNone) {
|
|
66
|
+
sb.append(" Flags: \n");
|
|
67
|
+
|
|
68
|
+
struct FlagMap {
|
|
69
|
+
CpuRWFlags flag;
|
|
70
|
+
char name[4];
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
static const FlagMap flagMap[] = {
|
|
74
|
+
{ CpuRWFlags::kX86_CF, "CF" },
|
|
75
|
+
{ CpuRWFlags::kX86_OF, "OF" },
|
|
76
|
+
{ CpuRWFlags::kX86_SF, "SF" },
|
|
77
|
+
{ CpuRWFlags::kX86_ZF, "ZF" },
|
|
78
|
+
{ CpuRWFlags::kX86_AF, "AF" },
|
|
79
|
+
{ CpuRWFlags::kX86_PF, "PF" },
|
|
80
|
+
{ CpuRWFlags::kX86_DF, "DF" },
|
|
81
|
+
{ CpuRWFlags::kX86_IF, "IF" },
|
|
82
|
+
{ CpuRWFlags::kX86_AC, "AC" },
|
|
83
|
+
{ CpuRWFlags::kX86_C0, "C0" },
|
|
84
|
+
{ CpuRWFlags::kX86_C1, "C1" },
|
|
85
|
+
{ CpuRWFlags::kX86_C2, "C2" },
|
|
86
|
+
{ CpuRWFlags::kX86_C3, "C3" }
|
|
87
|
+
};
|
|
88
|
+
|
|
89
|
+
sb.append(" ");
|
|
90
|
+
for (uint32_t f = 0; f < 13; f++) {
|
|
91
|
+
char c = accessLetter((rw.readFlags() & flagMap[f].flag) != CpuRWFlags::kNone,
|
|
92
|
+
(rw.writeFlags() & flagMap[f].flag) != CpuRWFlags::kNone);
|
|
93
|
+
if (c != '_')
|
|
94
|
+
sb.appendFormat("%s=%c ", flagMap[f].name, c);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
sb.append("\n");
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// CPU Features
|
|
101
|
+
// ------------
|
|
102
|
+
|
|
103
|
+
CpuFeatures features;
|
|
104
|
+
InstAPI::queryFeatures(arch, inst, operands, opCount, &features);
|
|
105
|
+
|
|
106
|
+
#ifndef ASMJIT_NO_LOGGING
|
|
107
|
+
if (!features.empty()) {
|
|
108
|
+
sb.append(" Features:\n");
|
|
109
|
+
sb.append(" ");
|
|
110
|
+
|
|
111
|
+
bool first = true;
|
|
112
|
+
CpuFeatures::Iterator it(features.iterator());
|
|
113
|
+
while (it.hasNext()) {
|
|
114
|
+
uint32_t featureId = uint32_t(it.next());
|
|
115
|
+
if (!first)
|
|
116
|
+
sb.append(" & ");
|
|
117
|
+
Formatter::formatFeature(sb, arch, featureId);
|
|
118
|
+
first = false;
|
|
119
|
+
}
|
|
120
|
+
sb.append("\n");
|
|
121
|
+
}
|
|
122
|
+
#endif
|
|
123
|
+
|
|
124
|
+
printf("%s\n", sb.data());
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
template<typename... Args>
|
|
128
|
+
static void printInfoSimple(Arch arch,InstId instId, InstOptions options, Args&&... args) {
|
|
129
|
+
BaseInst inst(instId);
|
|
130
|
+
inst.addOptions(options);
|
|
131
|
+
Operand_ opArray[] = { std::forward<Args>(args)... };
|
|
132
|
+
printInfo(arch, inst, opArray, sizeof...(args));
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
template<typename... Args>
|
|
136
|
+
static void printInfoExtra(Arch arch, InstId instId, InstOptions options, const BaseReg& extraReg, Args&&... args) {
|
|
137
|
+
BaseInst inst(instId);
|
|
138
|
+
inst.addOptions(options);
|
|
139
|
+
inst.setExtraReg(extraReg);
|
|
140
|
+
Operand_ opArray[] = { std::forward<Args>(args)... };
|
|
141
|
+
printInfo(arch, inst, opArray, sizeof...(args));
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
static void testX86Arch() {
|
|
145
|
+
#if !defined(ASMJIT_NO_X86)
|
|
146
|
+
using namespace x86;
|
|
147
|
+
Arch arch = Arch::kX64;
|
|
148
|
+
|
|
149
|
+
printInfoSimple(arch, Inst::kIdAdd, InstOptions::kNone, eax, ebx);
|
|
150
|
+
printInfoSimple(arch, Inst::kIdLods, InstOptions::kNone, eax, dword_ptr(rsi));
|
|
151
|
+
|
|
152
|
+
printInfoSimple(arch, Inst::kIdPshufd, InstOptions::kNone, xmm0, xmm1, imm(0));
|
|
153
|
+
printInfoSimple(arch, Inst::kIdPabsb, InstOptions::kNone, mm1, mm2);
|
|
154
|
+
printInfoSimple(arch, Inst::kIdPabsb, InstOptions::kNone, xmm1, xmm2);
|
|
155
|
+
printInfoSimple(arch, Inst::kIdPextrw, InstOptions::kNone, eax, mm1, imm(0));
|
|
156
|
+
printInfoSimple(arch, Inst::kIdPextrw, InstOptions::kNone, eax, xmm1, imm(0));
|
|
157
|
+
printInfoSimple(arch, Inst::kIdPextrw, InstOptions::kNone, ptr(rax), xmm1, imm(0));
|
|
158
|
+
|
|
159
|
+
printInfoSimple(arch, Inst::kIdVpdpbusd, InstOptions::kNone, xmm0, xmm1, xmm2);
|
|
160
|
+
printInfoSimple(arch, Inst::kIdVpdpbusd, InstOptions::kX86_Vex, xmm0, xmm1, xmm2);
|
|
161
|
+
|
|
162
|
+
printInfoSimple(arch, Inst::kIdVaddpd, InstOptions::kNone, ymm0, ymm1, ymm2);
|
|
163
|
+
printInfoSimple(arch, Inst::kIdVaddpd, InstOptions::kNone, ymm0, ymm30, ymm31);
|
|
164
|
+
printInfoSimple(arch, Inst::kIdVaddpd, InstOptions::kNone, zmm0, zmm1, zmm2);
|
|
165
|
+
|
|
166
|
+
printInfoExtra(arch, Inst::kIdVaddpd, InstOptions::kNone, k1, zmm0, zmm1, zmm2);
|
|
167
|
+
printInfoExtra(arch, Inst::kIdVaddpd, InstOptions::kX86_ZMask, k1, zmm0, zmm1, zmm2);
|
|
168
|
+
#endif
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
int main() {
|
|
172
|
+
printf("AsmJit Instruction Info Test-Suite v%u.%u.%u\n",
|
|
173
|
+
unsigned((ASMJIT_LIBRARY_VERSION >> 16) ),
|
|
174
|
+
unsigned((ASMJIT_LIBRARY_VERSION >> 8) & 0xFF),
|
|
175
|
+
unsigned((ASMJIT_LIBRARY_VERSION ) & 0xFF));
|
|
176
|
+
printf("\n");
|
|
177
|
+
|
|
178
|
+
testX86Arch();
|
|
179
|
+
|
|
180
|
+
return 0;
|
|
181
|
+
}
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
// This file is part of AsmJit project <https://asmjit.com>
|
|
2
|
+
//
|
|
3
|
+
// See asmjit.h or LICENSE.md for license and copyright information
|
|
4
|
+
// SPDX-License-Identifier: Zlib
|
|
5
|
+
|
|
6
|
+
#ifndef ASMJIT_TEST_MISC_H_INCLUDED
|
|
7
|
+
#define ASMJIT_TEST_MISC_H_INCLUDED
|
|
8
|
+
|
|
9
|
+
#include <asmjit/x86.h>
|
|
10
|
+
|
|
11
|
+
namespace asmtest {
|
|
12
|
+
|
|
13
|
+
using namespace asmjit;
|
|
14
|
+
|
|
15
|
+
// Generates a typical alpha blend function that uses SSE2 instruction set.
|
|
16
|
+
// This function combines emitting instructions with control flow constructs
|
|
17
|
+
// like binding Labels and jumping to them. This should be pretty representative.
|
|
18
|
+
template<typename Emitter>
|
|
19
|
+
static void generateSseAlphaBlendInternal(
|
|
20
|
+
Emitter& cc,
|
|
21
|
+
const x86::Gp& dst, const x86::Gp& src, const x86::Gp& n,
|
|
22
|
+
const x86::Gp& gp0,
|
|
23
|
+
const x86::Xmm& simd0, const x86::Xmm& simd1, const x86::Xmm& simd2, const x86::Xmm& simd3,
|
|
24
|
+
const x86::Xmm& simd4, const x86::Xmm& simd5, const x86::Xmm& simd6, const x86::Xmm& simd7) {
|
|
25
|
+
|
|
26
|
+
x86::Gp i = n;
|
|
27
|
+
x86::Gp j = gp0;
|
|
28
|
+
|
|
29
|
+
x86::Xmm vzero = simd0;
|
|
30
|
+
x86::Xmm v0080 = simd1;
|
|
31
|
+
x86::Xmm v0101 = simd2;
|
|
32
|
+
|
|
33
|
+
Label L_SmallLoop = cc.newLabel();
|
|
34
|
+
Label L_SmallEnd = cc.newLabel();
|
|
35
|
+
Label L_LargeLoop = cc.newLabel();
|
|
36
|
+
Label L_LargeEnd = cc.newLabel();
|
|
37
|
+
Label L_Done = cc.newLabel();
|
|
38
|
+
|
|
39
|
+
// Load SIMD Constants.
|
|
40
|
+
cc.xorps(vzero, vzero);
|
|
41
|
+
cc.mov(gp0.r32(), 0x00800080);
|
|
42
|
+
cc.movd(v0080, gp0.r32());
|
|
43
|
+
cc.mov(gp0.r32(), 0x01010101);
|
|
44
|
+
cc.movd(v0101, gp0.r32());
|
|
45
|
+
cc.pshufd(v0080, v0080, x86::shuffleImm(0, 0, 0, 0));
|
|
46
|
+
cc.pshufd(v0101, v0101, x86::shuffleImm(0, 0, 0, 0));
|
|
47
|
+
|
|
48
|
+
// How many pixels have to be processed to make the loop aligned.
|
|
49
|
+
cc.xor_(j, j);
|
|
50
|
+
cc.sub(j, dst);
|
|
51
|
+
cc.and_(j, 15);
|
|
52
|
+
cc.shr(j, 2);
|
|
53
|
+
cc.jz(L_SmallEnd);
|
|
54
|
+
|
|
55
|
+
cc.cmp(j, i);
|
|
56
|
+
cc.cmovg(j, i); // j = min(i, j)
|
|
57
|
+
cc.sub(i, j); // i -= j
|
|
58
|
+
|
|
59
|
+
// Small loop.
|
|
60
|
+
cc.bind(L_SmallLoop);
|
|
61
|
+
{
|
|
62
|
+
x86::Xmm x0 = simd3;
|
|
63
|
+
x86::Xmm y0 = simd4;
|
|
64
|
+
x86::Xmm a0 = simd5;
|
|
65
|
+
|
|
66
|
+
cc.movd(y0, x86::ptr(src));
|
|
67
|
+
cc.movd(x0, x86::ptr(dst));
|
|
68
|
+
|
|
69
|
+
cc.pcmpeqb(a0, a0);
|
|
70
|
+
cc.pxor(a0, y0);
|
|
71
|
+
cc.psrlw(a0, 8);
|
|
72
|
+
cc.punpcklbw(x0, vzero);
|
|
73
|
+
|
|
74
|
+
cc.pshuflw(a0, a0, x86::shuffleImm(1, 1, 1, 1));
|
|
75
|
+
cc.punpcklbw(y0, vzero);
|
|
76
|
+
|
|
77
|
+
cc.pmullw(x0, a0);
|
|
78
|
+
cc.paddsw(x0, v0080);
|
|
79
|
+
cc.pmulhuw(x0, v0101);
|
|
80
|
+
|
|
81
|
+
cc.paddw(x0, y0);
|
|
82
|
+
cc.packuswb(x0, x0);
|
|
83
|
+
|
|
84
|
+
cc.movd(x86::ptr(dst), x0);
|
|
85
|
+
|
|
86
|
+
cc.add(dst, 4);
|
|
87
|
+
cc.add(src, 4);
|
|
88
|
+
|
|
89
|
+
cc.dec(j);
|
|
90
|
+
cc.jnz(L_SmallLoop);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Second section, prepare for an aligned loop.
|
|
94
|
+
cc.bind(L_SmallEnd);
|
|
95
|
+
|
|
96
|
+
cc.test(i, i);
|
|
97
|
+
cc.mov(j, i);
|
|
98
|
+
cc.jz(L_Done);
|
|
99
|
+
|
|
100
|
+
cc.and_(j, 3);
|
|
101
|
+
cc.shr(i, 2);
|
|
102
|
+
cc.jz(L_LargeEnd);
|
|
103
|
+
|
|
104
|
+
// Aligned loop.
|
|
105
|
+
cc.bind(L_LargeLoop);
|
|
106
|
+
{
|
|
107
|
+
x86::Xmm x0 = simd3;
|
|
108
|
+
x86::Xmm x1 = simd4;
|
|
109
|
+
x86::Xmm y0 = simd5;
|
|
110
|
+
x86::Xmm a0 = simd6;
|
|
111
|
+
x86::Xmm a1 = simd7;
|
|
112
|
+
|
|
113
|
+
cc.movups(y0, x86::ptr(src));
|
|
114
|
+
cc.movaps(x0, x86::ptr(dst));
|
|
115
|
+
|
|
116
|
+
cc.pcmpeqb(a0, a0);
|
|
117
|
+
cc.xorps(a0, y0);
|
|
118
|
+
cc.movaps(x1, x0);
|
|
119
|
+
|
|
120
|
+
cc.psrlw(a0, 8);
|
|
121
|
+
cc.punpcklbw(x0, vzero);
|
|
122
|
+
|
|
123
|
+
cc.movaps(a1, a0);
|
|
124
|
+
cc.punpcklwd(a0, a0);
|
|
125
|
+
|
|
126
|
+
cc.punpckhbw(x1, vzero);
|
|
127
|
+
cc.punpckhwd(a1, a1);
|
|
128
|
+
|
|
129
|
+
cc.pshufd(a0, a0, x86::shuffleImm(3, 3, 1, 1));
|
|
130
|
+
cc.pshufd(a1, a1, x86::shuffleImm(3, 3, 1, 1));
|
|
131
|
+
|
|
132
|
+
cc.pmullw(x0, a0);
|
|
133
|
+
cc.pmullw(x1, a1);
|
|
134
|
+
|
|
135
|
+
cc.paddsw(x0, v0080);
|
|
136
|
+
cc.paddsw(x1, v0080);
|
|
137
|
+
|
|
138
|
+
cc.pmulhuw(x0, v0101);
|
|
139
|
+
cc.pmulhuw(x1, v0101);
|
|
140
|
+
|
|
141
|
+
cc.add(src, 16);
|
|
142
|
+
cc.packuswb(x0, x1);
|
|
143
|
+
|
|
144
|
+
cc.paddw(x0, y0);
|
|
145
|
+
cc.movaps(x86::ptr(dst), x0);
|
|
146
|
+
|
|
147
|
+
cc.add(dst, 16);
|
|
148
|
+
|
|
149
|
+
cc.dec(i);
|
|
150
|
+
cc.jnz(L_LargeLoop);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
cc.bind(L_LargeEnd);
|
|
154
|
+
cc.test(j, j);
|
|
155
|
+
cc.jnz(L_SmallLoop);
|
|
156
|
+
|
|
157
|
+
cc.bind(L_Done);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
static void generateSseAlphaBlend(asmjit::BaseEmitter& emitter, bool emitPrologEpilog) {
|
|
161
|
+
using namespace asmjit::x86;
|
|
162
|
+
|
|
163
|
+
if (emitter.isAssembler()) {
|
|
164
|
+
Assembler& cc = *emitter.as<Assembler>();
|
|
165
|
+
|
|
166
|
+
x86::Gp dst = cc.zax();
|
|
167
|
+
x86::Gp src = cc.zcx();
|
|
168
|
+
x86::Gp i = cc.zdx();
|
|
169
|
+
x86::Gp j = cc.zdi();
|
|
170
|
+
|
|
171
|
+
if (emitPrologEpilog) {
|
|
172
|
+
FuncDetail func;
|
|
173
|
+
func.init(FuncSignatureT<void, void*, const void*, size_t>(CallConvId::kHost), cc.environment());
|
|
174
|
+
|
|
175
|
+
FuncFrame frame;
|
|
176
|
+
frame.init(func);
|
|
177
|
+
frame.addDirtyRegs(dst, src, i, j);
|
|
178
|
+
frame.addDirtyRegs(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
|
179
|
+
|
|
180
|
+
FuncArgsAssignment args(&func);
|
|
181
|
+
args.assignAll(dst, src, i);
|
|
182
|
+
args.updateFuncFrame(frame);
|
|
183
|
+
frame.finalize();
|
|
184
|
+
|
|
185
|
+
cc.emitProlog(frame);
|
|
186
|
+
cc.emitArgsAssignment(frame, args);
|
|
187
|
+
generateSseAlphaBlendInternal(cc, dst, src, i, j, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
|
188
|
+
cc.emitEpilog(frame);
|
|
189
|
+
}
|
|
190
|
+
else {
|
|
191
|
+
generateSseAlphaBlendInternal(cc, dst, src, i, j, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
#ifndef ASMJIT_NO_BUILDER
|
|
195
|
+
else if (emitter.isBuilder()) {
|
|
196
|
+
Builder& cc = *emitter.as<Builder>();
|
|
197
|
+
|
|
198
|
+
x86::Gp dst = cc.zax();
|
|
199
|
+
x86::Gp src = cc.zcx();
|
|
200
|
+
x86::Gp i = cc.zdx();
|
|
201
|
+
x86::Gp j = cc.zdi();
|
|
202
|
+
|
|
203
|
+
if (emitPrologEpilog) {
|
|
204
|
+
FuncDetail func;
|
|
205
|
+
func.init(FuncSignatureT<void, void*, const void*, size_t>(CallConvId::kHost), cc.environment());
|
|
206
|
+
|
|
207
|
+
FuncFrame frame;
|
|
208
|
+
frame.init(func);
|
|
209
|
+
frame.addDirtyRegs(dst, src, i, j);
|
|
210
|
+
frame.addDirtyRegs(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
|
211
|
+
|
|
212
|
+
FuncArgsAssignment args(&func);
|
|
213
|
+
args.assignAll(dst, src, i);
|
|
214
|
+
args.updateFuncFrame(frame);
|
|
215
|
+
frame.finalize();
|
|
216
|
+
|
|
217
|
+
cc.emitProlog(frame);
|
|
218
|
+
cc.emitArgsAssignment(frame, args);
|
|
219
|
+
generateSseAlphaBlendInternal(cc, dst, src, i, j, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
|
220
|
+
cc.emitEpilog(frame);
|
|
221
|
+
}
|
|
222
|
+
else {
|
|
223
|
+
generateSseAlphaBlendInternal(cc, dst, src, i, j, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
#endif
|
|
227
|
+
#ifndef ASMJIT_NO_COMPILER
|
|
228
|
+
else if (emitter.isCompiler()) {
|
|
229
|
+
Compiler& cc = *emitter.as<Compiler>();
|
|
230
|
+
|
|
231
|
+
Gp dst = cc.newIntPtr("dst");
|
|
232
|
+
Gp src = cc.newIntPtr("src");
|
|
233
|
+
Gp i = cc.newIntPtr("i");
|
|
234
|
+
Gp j = cc.newIntPtr("j");
|
|
235
|
+
|
|
236
|
+
Xmm v0 = cc.newXmm("v0");
|
|
237
|
+
Xmm v1 = cc.newXmm("v1");
|
|
238
|
+
Xmm v2 = cc.newXmm("v2");
|
|
239
|
+
Xmm v3 = cc.newXmm("v3");
|
|
240
|
+
Xmm v4 = cc.newXmm("v4");
|
|
241
|
+
Xmm v5 = cc.newXmm("v5");
|
|
242
|
+
Xmm v6 = cc.newXmm("v6");
|
|
243
|
+
Xmm v7 = cc.newXmm("v7");
|
|
244
|
+
|
|
245
|
+
FuncNode* funcNode = cc.addFunc(FuncSignatureT<void, void*, const void*, size_t>(CallConvId::kHost));
|
|
246
|
+
funcNode->setArg(0, dst);
|
|
247
|
+
funcNode->setArg(1, src);
|
|
248
|
+
funcNode->setArg(2, i);
|
|
249
|
+
generateSseAlphaBlendInternal(cc, dst, src, i, j, v0, v1, v2, v3, v4, v5, v6, v7);
|
|
250
|
+
cc.endFunc();
|
|
251
|
+
}
|
|
252
|
+
#endif
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
} // {asmtest}
|
|
256
|
+
|
|
257
|
+
#endif // ASMJIT_TEST_MISC_H_INCLUDED
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
// This file is part of AsmJit project <https://asmjit.com>
|
|
2
|
+
//
|
|
3
|
+
// See asmjit.h or LICENSE.md for license and copyright information
|
|
4
|
+
// SPDX-License-Identifier: Zlib
|
|
5
|
+
|
|
6
|
+
#include <asmjit/core.h>
|
|
7
|
+
#include <stdio.h>
|
|
8
|
+
#include <stdlib.h>
|
|
9
|
+
#include <string.h>
|
|
10
|
+
|
|
11
|
+
#include "cmdline.h"
|
|
12
|
+
|
|
13
|
+
using namespace asmjit;
|
|
14
|
+
|
|
15
|
+
#if !defined(ASMJIT_NO_X86)
|
|
16
|
+
void benchmarkX86Emitters(uint32_t numIterations, bool testX86, bool testX64) noexcept;
|
|
17
|
+
#endif
|
|
18
|
+
|
|
19
|
+
#if !defined(ASMJIT_NO_AARCH64)
|
|
20
|
+
void benchmarkA64Emitters(uint32_t numIterations);
|
|
21
|
+
#endif
|
|
22
|
+
|
|
23
|
+
int main(int argc, char* argv[]) {
|
|
24
|
+
CmdLine cmdLine(argc, argv);
|
|
25
|
+
uint32_t numIterations = 20000;
|
|
26
|
+
|
|
27
|
+
printf("AsmJit Performance Suite v%u.%u.%u:\n\n",
|
|
28
|
+
unsigned((ASMJIT_LIBRARY_VERSION >> 16) ),
|
|
29
|
+
unsigned((ASMJIT_LIBRARY_VERSION >> 8) & 0xFF),
|
|
30
|
+
unsigned((ASMJIT_LIBRARY_VERSION ) & 0xFF));
|
|
31
|
+
|
|
32
|
+
printf("Usage:\n");
|
|
33
|
+
printf(" --help Show usage only\n");
|
|
34
|
+
printf(" --quick Decrease the number of iterations to make tests quicker\n");
|
|
35
|
+
printf(" --arch=<ARCH> Select architecture to run ('all' by default)\n");
|
|
36
|
+
printf("\n");
|
|
37
|
+
|
|
38
|
+
if (cmdLine.hasArg("--help"))
|
|
39
|
+
return 0;
|
|
40
|
+
|
|
41
|
+
if (cmdLine.hasArg("--quick"))
|
|
42
|
+
numIterations = 1000;
|
|
43
|
+
|
|
44
|
+
const char* arch = cmdLine.valueOf("--arch", "all");
|
|
45
|
+
|
|
46
|
+
#if !defined(ASMJIT_NO_X86)
|
|
47
|
+
bool testX86 = strcmp(arch, "all") == 0 || strcmp(arch, "x86") == 0;
|
|
48
|
+
bool testX64 = strcmp(arch, "all") == 0 || strcmp(arch, "x64") == 0;
|
|
49
|
+
|
|
50
|
+
if (testX86 || testX64)
|
|
51
|
+
benchmarkX86Emitters(numIterations, testX86, testX64);
|
|
52
|
+
#endif
|
|
53
|
+
|
|
54
|
+
#if !defined(ASMJIT_NO_AARCH64)
|
|
55
|
+
bool testAArch64 = strcmp(arch, "all") == 0 || strcmp(arch, "aarch64") == 0;
|
|
56
|
+
|
|
57
|
+
if (testAArch64)
|
|
58
|
+
benchmarkA64Emitters(numIterations);
|
|
59
|
+
#endif
|
|
60
|
+
|
|
61
|
+
return 0;
|
|
62
|
+
}
|