asmjit 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/asmjit.gemspec +1 -1
- data/ext/asmjit/asmjit/.editorconfig +10 -0
- data/ext/asmjit/asmjit/.github/FUNDING.yml +1 -0
- data/ext/asmjit/asmjit/.github/workflows/build-config.json +47 -0
- data/ext/asmjit/asmjit/.github/workflows/build.yml +156 -0
- data/ext/asmjit/asmjit/.gitignore +6 -0
- data/ext/asmjit/asmjit/CMakeLists.txt +611 -0
- data/ext/asmjit/asmjit/LICENSE.md +17 -0
- data/ext/asmjit/asmjit/README.md +69 -0
- data/ext/asmjit/asmjit/src/asmjit/a64.h +62 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64archtraits_p.h +81 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64assembler.cpp +5115 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64assembler.h +72 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64builder.cpp +51 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64builder.h +57 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64compiler.cpp +60 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64compiler.h +247 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64emithelper.cpp +464 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64emithelper_p.h +50 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64emitter.h +1228 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64formatter.cpp +298 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64formatter_p.h +59 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64func.cpp +189 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64func_p.h +33 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64globals.h +1894 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64instapi.cpp +278 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64instapi_p.h +41 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb.cpp +1957 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb.h +74 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb_p.h +876 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64operand.cpp +85 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64operand.h +312 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64rapass.cpp +852 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64rapass_p.h +105 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64utils.h +179 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/armformatter.cpp +143 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/armformatter_p.h +44 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/armglobals.h +21 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/armoperand.h +621 -0
- data/ext/asmjit/asmjit/src/asmjit/arm.h +62 -0
- data/ext/asmjit/asmjit/src/asmjit/asmjit-scope-begin.h +17 -0
- data/ext/asmjit/asmjit/src/asmjit/asmjit-scope-end.h +9 -0
- data/ext/asmjit/asmjit/src/asmjit/asmjit.h +33 -0
- data/ext/asmjit/asmjit/src/asmjit/core/api-build_p.h +55 -0
- data/ext/asmjit/asmjit/src/asmjit/core/api-config.h +613 -0
- data/ext/asmjit/asmjit/src/asmjit/core/archcommons.h +229 -0
- data/ext/asmjit/asmjit/src/asmjit/core/archtraits.cpp +160 -0
- data/ext/asmjit/asmjit/src/asmjit/core/archtraits.h +290 -0
- data/ext/asmjit/asmjit/src/asmjit/core/assembler.cpp +406 -0
- data/ext/asmjit/asmjit/src/asmjit/core/assembler.h +129 -0
- data/ext/asmjit/asmjit/src/asmjit/core/builder.cpp +889 -0
- data/ext/asmjit/asmjit/src/asmjit/core/builder.h +1391 -0
- data/ext/asmjit/asmjit/src/asmjit/core/codebuffer.h +113 -0
- data/ext/asmjit/asmjit/src/asmjit/core/codeholder.cpp +1149 -0
- data/ext/asmjit/asmjit/src/asmjit/core/codeholder.h +1035 -0
- data/ext/asmjit/asmjit/src/asmjit/core/codewriter.cpp +175 -0
- data/ext/asmjit/asmjit/src/asmjit/core/codewriter_p.h +179 -0
- data/ext/asmjit/asmjit/src/asmjit/core/compiler.cpp +582 -0
- data/ext/asmjit/asmjit/src/asmjit/core/compiler.h +737 -0
- data/ext/asmjit/asmjit/src/asmjit/core/compilerdefs.h +173 -0
- data/ext/asmjit/asmjit/src/asmjit/core/constpool.cpp +363 -0
- data/ext/asmjit/asmjit/src/asmjit/core/constpool.h +250 -0
- data/ext/asmjit/asmjit/src/asmjit/core/cpuinfo.cpp +1162 -0
- data/ext/asmjit/asmjit/src/asmjit/core/cpuinfo.h +813 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emithelper.cpp +323 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emithelper_p.h +58 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emitter.cpp +333 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emitter.h +741 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emitterutils.cpp +129 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emitterutils_p.h +89 -0
- data/ext/asmjit/asmjit/src/asmjit/core/environment.cpp +46 -0
- data/ext/asmjit/asmjit/src/asmjit/core/environment.h +508 -0
- data/ext/asmjit/asmjit/src/asmjit/core/errorhandler.cpp +14 -0
- data/ext/asmjit/asmjit/src/asmjit/core/errorhandler.h +228 -0
- data/ext/asmjit/asmjit/src/asmjit/core/formatter.cpp +584 -0
- data/ext/asmjit/asmjit/src/asmjit/core/formatter.h +247 -0
- data/ext/asmjit/asmjit/src/asmjit/core/formatter_p.h +34 -0
- data/ext/asmjit/asmjit/src/asmjit/core/func.cpp +286 -0
- data/ext/asmjit/asmjit/src/asmjit/core/func.h +1445 -0
- data/ext/asmjit/asmjit/src/asmjit/core/funcargscontext.cpp +293 -0
- data/ext/asmjit/asmjit/src/asmjit/core/funcargscontext_p.h +199 -0
- data/ext/asmjit/asmjit/src/asmjit/core/globals.cpp +133 -0
- data/ext/asmjit/asmjit/src/asmjit/core/globals.h +393 -0
- data/ext/asmjit/asmjit/src/asmjit/core/inst.cpp +113 -0
- data/ext/asmjit/asmjit/src/asmjit/core/inst.h +772 -0
- data/ext/asmjit/asmjit/src/asmjit/core/jitallocator.cpp +1242 -0
- data/ext/asmjit/asmjit/src/asmjit/core/jitallocator.h +261 -0
- data/ext/asmjit/asmjit/src/asmjit/core/jitruntime.cpp +80 -0
- data/ext/asmjit/asmjit/src/asmjit/core/jitruntime.h +89 -0
- data/ext/asmjit/asmjit/src/asmjit/core/logger.cpp +69 -0
- data/ext/asmjit/asmjit/src/asmjit/core/logger.h +198 -0
- data/ext/asmjit/asmjit/src/asmjit/core/misc_p.h +33 -0
- data/ext/asmjit/asmjit/src/asmjit/core/operand.cpp +132 -0
- data/ext/asmjit/asmjit/src/asmjit/core/operand.h +1611 -0
- data/ext/asmjit/asmjit/src/asmjit/core/osutils.cpp +84 -0
- data/ext/asmjit/asmjit/src/asmjit/core/osutils.h +61 -0
- data/ext/asmjit/asmjit/src/asmjit/core/osutils_p.h +68 -0
- data/ext/asmjit/asmjit/src/asmjit/core/raassignment_p.h +418 -0
- data/ext/asmjit/asmjit/src/asmjit/core/rabuilders_p.h +612 -0
- data/ext/asmjit/asmjit/src/asmjit/core/radefs_p.h +1204 -0
- data/ext/asmjit/asmjit/src/asmjit/core/ralocal.cpp +1166 -0
- data/ext/asmjit/asmjit/src/asmjit/core/ralocal_p.h +254 -0
- data/ext/asmjit/asmjit/src/asmjit/core/rapass.cpp +1969 -0
- data/ext/asmjit/asmjit/src/asmjit/core/rapass_p.h +1183 -0
- data/ext/asmjit/asmjit/src/asmjit/core/rastack.cpp +184 -0
- data/ext/asmjit/asmjit/src/asmjit/core/rastack_p.h +171 -0
- data/ext/asmjit/asmjit/src/asmjit/core/string.cpp +559 -0
- data/ext/asmjit/asmjit/src/asmjit/core/string.h +372 -0
- data/ext/asmjit/asmjit/src/asmjit/core/support.cpp +494 -0
- data/ext/asmjit/asmjit/src/asmjit/core/support.h +1773 -0
- data/ext/asmjit/asmjit/src/asmjit/core/target.cpp +14 -0
- data/ext/asmjit/asmjit/src/asmjit/core/target.h +53 -0
- data/ext/asmjit/asmjit/src/asmjit/core/type.cpp +74 -0
- data/ext/asmjit/asmjit/src/asmjit/core/type.h +419 -0
- data/ext/asmjit/asmjit/src/asmjit/core/virtmem.cpp +722 -0
- data/ext/asmjit/asmjit/src/asmjit/core/virtmem.h +242 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zone.cpp +353 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zone.h +615 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonehash.cpp +309 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonehash.h +186 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonelist.cpp +163 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonelist.h +209 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonestack.cpp +176 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonestack.h +239 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonestring.h +120 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonetree.cpp +99 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonetree.h +380 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonevector.cpp +356 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonevector.h +690 -0
- data/ext/asmjit/asmjit/src/asmjit/core.h +1861 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86archtraits_p.h +148 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86assembler.cpp +5110 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86assembler.h +685 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86builder.cpp +52 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86builder.h +351 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86compiler.cpp +61 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86compiler.h +721 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86emithelper.cpp +619 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86emithelper_p.h +60 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86emitter.h +4315 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86formatter.cpp +944 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86formatter_p.h +58 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86func.cpp +503 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86func_p.h +33 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86globals.h +2169 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86instapi.cpp +1732 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86instapi_p.h +41 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb.cpp +4427 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb.h +563 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb_p.h +311 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86opcode_p.h +436 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86operand.cpp +231 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86operand.h +1085 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86rapass.cpp +1509 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86rapass_p.h +94 -0
- data/ext/asmjit/asmjit/src/asmjit/x86.h +93 -0
- data/ext/asmjit/asmjit/src/asmjit.natvis +245 -0
- data/ext/asmjit/asmjit/test/asmjit_test_assembler.cpp +84 -0
- data/ext/asmjit/asmjit/test/asmjit_test_assembler.h +85 -0
- data/ext/asmjit/asmjit/test/asmjit_test_assembler_a64.cpp +4006 -0
- data/ext/asmjit/asmjit/test/asmjit_test_assembler_x64.cpp +17833 -0
- data/ext/asmjit/asmjit/test/asmjit_test_assembler_x86.cpp +8300 -0
- data/ext/asmjit/asmjit/test/asmjit_test_compiler.cpp +253 -0
- data/ext/asmjit/asmjit/test/asmjit_test_compiler.h +73 -0
- data/ext/asmjit/asmjit/test/asmjit_test_compiler_a64.cpp +690 -0
- data/ext/asmjit/asmjit/test/asmjit_test_compiler_x86.cpp +4317 -0
- data/ext/asmjit/asmjit/test/asmjit_test_emitters.cpp +197 -0
- data/ext/asmjit/asmjit/test/asmjit_test_instinfo.cpp +181 -0
- data/ext/asmjit/asmjit/test/asmjit_test_misc.h +257 -0
- data/ext/asmjit/asmjit/test/asmjit_test_perf.cpp +62 -0
- data/ext/asmjit/asmjit/test/asmjit_test_perf.h +61 -0
- data/ext/asmjit/asmjit/test/asmjit_test_perf_a64.cpp +699 -0
- data/ext/asmjit/asmjit/test/asmjit_test_perf_x86.cpp +5032 -0
- data/ext/asmjit/asmjit/test/asmjit_test_unit.cpp +172 -0
- data/ext/asmjit/asmjit/test/asmjit_test_x86_sections.cpp +172 -0
- data/ext/asmjit/asmjit/test/asmjitutils.h +38 -0
- data/ext/asmjit/asmjit/test/broken.cpp +312 -0
- data/ext/asmjit/asmjit/test/broken.h +148 -0
- data/ext/asmjit/asmjit/test/cmdline.h +61 -0
- data/ext/asmjit/asmjit/test/performancetimer.h +41 -0
- data/ext/asmjit/asmjit/tools/configure-makefiles.sh +13 -0
- data/ext/asmjit/asmjit/tools/configure-ninja.sh +13 -0
- data/ext/asmjit/asmjit/tools/configure-sanitizers.sh +13 -0
- data/ext/asmjit/asmjit/tools/configure-vs2019-x64.bat +2 -0
- data/ext/asmjit/asmjit/tools/configure-vs2019-x86.bat +2 -0
- data/ext/asmjit/asmjit/tools/configure-vs2022-x64.bat +2 -0
- data/ext/asmjit/asmjit/tools/configure-vs2022-x86.bat +2 -0
- data/ext/asmjit/asmjit/tools/configure-xcode.sh +8 -0
- data/ext/asmjit/asmjit/tools/enumgen.js +417 -0
- data/ext/asmjit/asmjit/tools/enumgen.sh +3 -0
- data/ext/asmjit/asmjit/tools/tablegen-arm.js +365 -0
- data/ext/asmjit/asmjit/tools/tablegen-arm.sh +3 -0
- data/ext/asmjit/asmjit/tools/tablegen-x86.js +2638 -0
- data/ext/asmjit/asmjit/tools/tablegen-x86.sh +3 -0
- data/ext/asmjit/asmjit/tools/tablegen.js +947 -0
- data/ext/asmjit/asmjit/tools/tablegen.sh +4 -0
- data/ext/asmjit/asmjit.cc +18 -0
- data/lib/asmjit/version.rb +1 -1
- metadata +197 -2
@@ -0,0 +1,197 @@
|
|
1
|
+
// This file is part of AsmJit project <https://asmjit.com>
|
2
|
+
//
|
3
|
+
// See asmjit.h or LICENSE.md for license and copyright information
|
4
|
+
// SPDX-License-Identifier: Zlib
|
5
|
+
|
6
|
+
#include <asmjit/core.h>
|
7
|
+
|
8
|
+
#if !defined(ASMJIT_NO_X86) && ASMJIT_ARCH_X86
|
9
|
+
#include <asmjit/x86.h>
|
10
|
+
|
11
|
+
#include <stdio.h>
|
12
|
+
#include <stdlib.h>
|
13
|
+
#include <string.h>
|
14
|
+
|
15
|
+
using namespace asmjit;
|
16
|
+
|
17
|
+
// Signature of the generated function.
|
18
|
+
typedef void (*SumIntsFunc)(int* dst, const int* a, const int* b);
|
19
|
+
|
20
|
+
// This function works with both x86::Assembler and x86::Builder. It shows how
|
21
|
+
// `x86::Emitter` can be used to make your code more generic.
|
22
|
+
static void makeRawFunc(x86::Emitter* emitter) noexcept {
|
23
|
+
// Decide which registers will be mapped to function arguments. Try changing
|
24
|
+
// registers of `dst`, `src_a`, and `src_b` and see what happens in function's
|
25
|
+
// prolog and epilog.
|
26
|
+
x86::Gp dst = emitter->zax();
|
27
|
+
x86::Gp src_a = emitter->zcx();
|
28
|
+
x86::Gp src_b = emitter->zdx();
|
29
|
+
|
30
|
+
// Decide which vector registers to use. We use these to keep the code generic,
|
31
|
+
// you can switch to any other registers when needed.
|
32
|
+
x86::Xmm vec0 = x86::xmm0;
|
33
|
+
x86::Xmm vec1 = x86::xmm1;
|
34
|
+
|
35
|
+
// Create and initialize `FuncDetail` and `FuncFrame`.
|
36
|
+
FuncDetail func;
|
37
|
+
func.init(FuncSignatureT<void, int*, const int*, const int*>(CallConvId::kHost), emitter->environment());
|
38
|
+
|
39
|
+
FuncFrame frame;
|
40
|
+
frame.init(func);
|
41
|
+
|
42
|
+
// Make XMM0 and XMM1 dirty. VEC group includes XMM|YMM|ZMM registers.
|
43
|
+
frame.addDirtyRegs(x86::xmm0, x86::xmm1);
|
44
|
+
|
45
|
+
FuncArgsAssignment args(&func); // Create arguments assignment context.
|
46
|
+
args.assignAll(dst, src_a, src_b); // Assign our registers to arguments.
|
47
|
+
args.updateFuncFrame(frame); // Reflect our args in FuncFrame.
|
48
|
+
frame.finalize();
|
49
|
+
|
50
|
+
// Emit prolog and allocate arguments to registers.
|
51
|
+
emitter->emitProlog(frame);
|
52
|
+
emitter->emitArgsAssignment(frame, args);
|
53
|
+
|
54
|
+
emitter->movdqu(vec0, x86::ptr(src_a)); // Load 4 ints from [src_a] to XMM0.
|
55
|
+
emitter->movdqu(vec1, x86::ptr(src_b)); // Load 4 ints from [src_b] to XMM1.
|
56
|
+
|
57
|
+
emitter->paddd(vec0, vec1); // Add 4 ints in XMM1 to XMM0.
|
58
|
+
emitter->movdqu(x86::ptr(dst), vec0); // Store the result to [dst].
|
59
|
+
|
60
|
+
// Emit epilog and return.
|
61
|
+
emitter->emitEpilog(frame);
|
62
|
+
}
|
63
|
+
|
64
|
+
#ifndef ASMJIT_NO_COMPILER
|
65
|
+
// This function works with x86::Compiler, provided for comparison.
|
66
|
+
static void makeCompiledFunc(x86::Compiler* cc) noexcept {
|
67
|
+
x86::Gp dst = cc->newIntPtr("dst");
|
68
|
+
x86::Gp src_a = cc->newIntPtr("src_a");
|
69
|
+
x86::Gp src_b = cc->newIntPtr("src_b");
|
70
|
+
x86::Xmm vec0 = cc->newXmm("vec0");
|
71
|
+
x86::Xmm vec1 = cc->newXmm("vec1");
|
72
|
+
|
73
|
+
FuncNode* funcNode = cc->addFunc(FuncSignatureT<void, int*, const int*, const int*>(CallConvId::kHost));
|
74
|
+
funcNode->setArg(0, dst);
|
75
|
+
funcNode->setArg(1, src_a);
|
76
|
+
funcNode->setArg(2, src_b);
|
77
|
+
|
78
|
+
cc->movdqu(vec0, x86::ptr(src_a));
|
79
|
+
cc->movdqu(vec1, x86::ptr(src_b));
|
80
|
+
cc->paddd(vec0, vec1);
|
81
|
+
cc->movdqu(x86::ptr(dst), vec0);
|
82
|
+
cc->endFunc();
|
83
|
+
}
|
84
|
+
#endif
|
85
|
+
|
86
|
+
static uint32_t testFunc(JitRuntime& rt, EmitterType emitterType) noexcept {
|
87
|
+
#ifndef ASMJIT_NO_LOGGING
|
88
|
+
FileLogger logger(stdout);
|
89
|
+
logger.setIndentation(FormatIndentationGroup::kCode, 2);
|
90
|
+
#endif
|
91
|
+
|
92
|
+
CodeHolder code;
|
93
|
+
code.init(rt.environment());
|
94
|
+
|
95
|
+
#ifndef ASMJIT_NO_LOGGING
|
96
|
+
code.setLogger(&logger);
|
97
|
+
#endif
|
98
|
+
|
99
|
+
Error err = kErrorOk;
|
100
|
+
switch (emitterType) {
|
101
|
+
case EmitterType::kNone: {
|
102
|
+
break;
|
103
|
+
}
|
104
|
+
|
105
|
+
case EmitterType::kAssembler: {
|
106
|
+
printf("Using x86::Assembler:\n");
|
107
|
+
x86::Assembler a(&code);
|
108
|
+
makeRawFunc(a.as<x86::Emitter>());
|
109
|
+
break;
|
110
|
+
}
|
111
|
+
|
112
|
+
#ifndef ASMJIT_NO_BUILDER
|
113
|
+
case EmitterType::kBuilder: {
|
114
|
+
printf("Using x86::Builder:\n");
|
115
|
+
x86::Builder cb(&code);
|
116
|
+
makeRawFunc(cb.as<x86::Emitter>());
|
117
|
+
|
118
|
+
err = cb.finalize();
|
119
|
+
if (err) {
|
120
|
+
printf("** FAILURE: x86::Builder::finalize() failed (%s) **\n", DebugUtils::errorAsString(err));
|
121
|
+
return 1;
|
122
|
+
}
|
123
|
+
break;
|
124
|
+
}
|
125
|
+
#endif
|
126
|
+
|
127
|
+
#ifndef ASMJIT_NO_COMPILER
|
128
|
+
case EmitterType::kCompiler: {
|
129
|
+
printf("Using x86::Compiler:\n");
|
130
|
+
x86::Compiler cc(&code);
|
131
|
+
makeCompiledFunc(&cc);
|
132
|
+
|
133
|
+
err = cc.finalize();
|
134
|
+
if (err) {
|
135
|
+
printf("** FAILURE: x86::Compiler::finalize() failed (%s) **\n", DebugUtils::errorAsString(err));
|
136
|
+
return 1;
|
137
|
+
}
|
138
|
+
break;
|
139
|
+
}
|
140
|
+
#endif
|
141
|
+
}
|
142
|
+
|
143
|
+
// Add the code generated to the runtime.
|
144
|
+
SumIntsFunc fn;
|
145
|
+
err = rt.add(&fn, &code);
|
146
|
+
|
147
|
+
if (err) {
|
148
|
+
printf("** FAILURE: JitRuntime::add() failed (%s) **\n", DebugUtils::errorAsString(err));
|
149
|
+
return 1;
|
150
|
+
}
|
151
|
+
|
152
|
+
// Execute the generated function.
|
153
|
+
int inA[4] = { 4, 3, 2, 1 };
|
154
|
+
int inB[4] = { 1, 5, 2, 8 };
|
155
|
+
int out[4];
|
156
|
+
fn(out, inA, inB);
|
157
|
+
|
158
|
+
// Should print {5 8 4 9}.
|
159
|
+
printf("Result = { %d %d %d %d }\n\n", out[0], out[1], out[2], out[3]);
|
160
|
+
|
161
|
+
rt.release(fn);
|
162
|
+
return !(out[0] == 5 && out[1] == 8 && out[2] == 4 && out[3] == 9);
|
163
|
+
}
|
164
|
+
|
165
|
+
int main() {
|
166
|
+
printf("AsmJit Emitters Test-Suite v%u.%u.%u\n",
|
167
|
+
unsigned((ASMJIT_LIBRARY_VERSION >> 16) ),
|
168
|
+
unsigned((ASMJIT_LIBRARY_VERSION >> 8) & 0xFF),
|
169
|
+
unsigned((ASMJIT_LIBRARY_VERSION ) & 0xFF));
|
170
|
+
printf("\n");
|
171
|
+
|
172
|
+
JitRuntime rt;
|
173
|
+
unsigned nFailed = 0;
|
174
|
+
|
175
|
+
nFailed += testFunc(rt, EmitterType::kAssembler);
|
176
|
+
|
177
|
+
#ifndef ASMJIT_NO_BUILDER
|
178
|
+
nFailed += testFunc(rt, EmitterType::kBuilder);
|
179
|
+
#endif
|
180
|
+
|
181
|
+
#ifndef ASMJIT_NO_COMPILER
|
182
|
+
nFailed += testFunc(rt, EmitterType::kCompiler);
|
183
|
+
#endif
|
184
|
+
|
185
|
+
if (!nFailed)
|
186
|
+
printf("** SUCCESS **\n");
|
187
|
+
else
|
188
|
+
printf("** FAILURE - %u %s failed ** \n", nFailed, nFailed == 1 ? "test" : "tests");
|
189
|
+
|
190
|
+
return nFailed ? 1 : 0;
|
191
|
+
}
|
192
|
+
#else
|
193
|
+
int main() {
|
194
|
+
printf("AsmJit X86 Emitter Test is disabled on non-x86 host\n\n");
|
195
|
+
return 0;
|
196
|
+
}
|
197
|
+
#endif // !ASMJIT_NO_X86 && ASMJIT_ARCH_X86
|
@@ -0,0 +1,181 @@
|
|
1
|
+
// This file is part of AsmJit project <https://asmjit.com>
|
2
|
+
//
|
3
|
+
// See asmjit.h or LICENSE.md for license and copyright information
|
4
|
+
// SPDX-License-Identifier: Zlib
|
5
|
+
|
6
|
+
#include <asmjit/core.h>
|
7
|
+
|
8
|
+
#if !defined(ASMJIT_NO_X86)
|
9
|
+
#include <asmjit/x86.h>
|
10
|
+
#endif
|
11
|
+
|
12
|
+
#include <stdio.h>
|
13
|
+
|
14
|
+
using namespace asmjit;
|
15
|
+
|
16
|
+
static char accessLetter(bool r, bool w) noexcept {
|
17
|
+
return r && w ? 'X' : r ? 'R' : w ? 'W' : '_';
|
18
|
+
}
|
19
|
+
|
20
|
+
static void printInfo(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount) {
|
21
|
+
StringTmp<512> sb;
|
22
|
+
|
23
|
+
// Read & Write Information
|
24
|
+
// ------------------------
|
25
|
+
|
26
|
+
InstRWInfo rw;
|
27
|
+
InstAPI::queryRWInfo(arch, inst, operands, opCount, &rw);
|
28
|
+
|
29
|
+
#ifndef ASMJIT_NO_LOGGING
|
30
|
+
Formatter::formatInstruction(sb, FormatFlags::kNone, nullptr, arch, inst, operands, opCount);
|
31
|
+
#else
|
32
|
+
sb.append("<Logging-Not-Available>");
|
33
|
+
#endif
|
34
|
+
sb.append("\n");
|
35
|
+
|
36
|
+
sb.append(" Operands:\n");
|
37
|
+
for (uint32_t i = 0; i < rw.opCount(); i++) {
|
38
|
+
const OpRWInfo& op = rw.operand(i);
|
39
|
+
|
40
|
+
sb.appendFormat(" [%u] Op=%c Read=%016llX Write=%016llX Extend=%016llX",
|
41
|
+
i,
|
42
|
+
accessLetter(op.isRead(), op.isWrite()),
|
43
|
+
op.readByteMask(),
|
44
|
+
op.writeByteMask(),
|
45
|
+
op.extendByteMask());
|
46
|
+
|
47
|
+
if (op.isMemBaseUsed()) {
|
48
|
+
sb.appendFormat(" Base=%c", accessLetter(op.isMemBaseRead(), op.isMemBaseWrite()));
|
49
|
+
if (op.isMemBasePreModify())
|
50
|
+
sb.appendFormat(" <PRE>");
|
51
|
+
if (op.isMemBasePostModify())
|
52
|
+
sb.appendFormat(" <POST>");
|
53
|
+
}
|
54
|
+
|
55
|
+
if (op.isMemIndexUsed()) {
|
56
|
+
sb.appendFormat(" Index=%c", accessLetter(op.isMemIndexRead(), op.isMemIndexWrite()));
|
57
|
+
}
|
58
|
+
|
59
|
+
sb.append("\n");
|
60
|
+
}
|
61
|
+
|
62
|
+
// CPU Flags (Read/Write)
|
63
|
+
// ----------------------
|
64
|
+
|
65
|
+
if ((rw.readFlags() | rw.writeFlags()) != CpuRWFlags::kNone) {
|
66
|
+
sb.append(" Flags: \n");
|
67
|
+
|
68
|
+
struct FlagMap {
|
69
|
+
CpuRWFlags flag;
|
70
|
+
char name[4];
|
71
|
+
};
|
72
|
+
|
73
|
+
static const FlagMap flagMap[] = {
|
74
|
+
{ CpuRWFlags::kX86_CF, "CF" },
|
75
|
+
{ CpuRWFlags::kX86_OF, "OF" },
|
76
|
+
{ CpuRWFlags::kX86_SF, "SF" },
|
77
|
+
{ CpuRWFlags::kX86_ZF, "ZF" },
|
78
|
+
{ CpuRWFlags::kX86_AF, "AF" },
|
79
|
+
{ CpuRWFlags::kX86_PF, "PF" },
|
80
|
+
{ CpuRWFlags::kX86_DF, "DF" },
|
81
|
+
{ CpuRWFlags::kX86_IF, "IF" },
|
82
|
+
{ CpuRWFlags::kX86_AC, "AC" },
|
83
|
+
{ CpuRWFlags::kX86_C0, "C0" },
|
84
|
+
{ CpuRWFlags::kX86_C1, "C1" },
|
85
|
+
{ CpuRWFlags::kX86_C2, "C2" },
|
86
|
+
{ CpuRWFlags::kX86_C3, "C3" }
|
87
|
+
};
|
88
|
+
|
89
|
+
sb.append(" ");
|
90
|
+
for (uint32_t f = 0; f < 13; f++) {
|
91
|
+
char c = accessLetter((rw.readFlags() & flagMap[f].flag) != CpuRWFlags::kNone,
|
92
|
+
(rw.writeFlags() & flagMap[f].flag) != CpuRWFlags::kNone);
|
93
|
+
if (c != '_')
|
94
|
+
sb.appendFormat("%s=%c ", flagMap[f].name, c);
|
95
|
+
}
|
96
|
+
|
97
|
+
sb.append("\n");
|
98
|
+
}
|
99
|
+
|
100
|
+
// CPU Features
|
101
|
+
// ------------
|
102
|
+
|
103
|
+
CpuFeatures features;
|
104
|
+
InstAPI::queryFeatures(arch, inst, operands, opCount, &features);
|
105
|
+
|
106
|
+
#ifndef ASMJIT_NO_LOGGING
|
107
|
+
if (!features.empty()) {
|
108
|
+
sb.append(" Features:\n");
|
109
|
+
sb.append(" ");
|
110
|
+
|
111
|
+
bool first = true;
|
112
|
+
CpuFeatures::Iterator it(features.iterator());
|
113
|
+
while (it.hasNext()) {
|
114
|
+
uint32_t featureId = uint32_t(it.next());
|
115
|
+
if (!first)
|
116
|
+
sb.append(" & ");
|
117
|
+
Formatter::formatFeature(sb, arch, featureId);
|
118
|
+
first = false;
|
119
|
+
}
|
120
|
+
sb.append("\n");
|
121
|
+
}
|
122
|
+
#endif
|
123
|
+
|
124
|
+
printf("%s\n", sb.data());
|
125
|
+
}
|
126
|
+
|
127
|
+
template<typename... Args>
|
128
|
+
static void printInfoSimple(Arch arch,InstId instId, InstOptions options, Args&&... args) {
|
129
|
+
BaseInst inst(instId);
|
130
|
+
inst.addOptions(options);
|
131
|
+
Operand_ opArray[] = { std::forward<Args>(args)... };
|
132
|
+
printInfo(arch, inst, opArray, sizeof...(args));
|
133
|
+
}
|
134
|
+
|
135
|
+
template<typename... Args>
|
136
|
+
static void printInfoExtra(Arch arch, InstId instId, InstOptions options, const BaseReg& extraReg, Args&&... args) {
|
137
|
+
BaseInst inst(instId);
|
138
|
+
inst.addOptions(options);
|
139
|
+
inst.setExtraReg(extraReg);
|
140
|
+
Operand_ opArray[] = { std::forward<Args>(args)... };
|
141
|
+
printInfo(arch, inst, opArray, sizeof...(args));
|
142
|
+
}
|
143
|
+
|
144
|
+
static void testX86Arch() {
|
145
|
+
#if !defined(ASMJIT_NO_X86)
|
146
|
+
using namespace x86;
|
147
|
+
Arch arch = Arch::kX64;
|
148
|
+
|
149
|
+
printInfoSimple(arch, Inst::kIdAdd, InstOptions::kNone, eax, ebx);
|
150
|
+
printInfoSimple(arch, Inst::kIdLods, InstOptions::kNone, eax, dword_ptr(rsi));
|
151
|
+
|
152
|
+
printInfoSimple(arch, Inst::kIdPshufd, InstOptions::kNone, xmm0, xmm1, imm(0));
|
153
|
+
printInfoSimple(arch, Inst::kIdPabsb, InstOptions::kNone, mm1, mm2);
|
154
|
+
printInfoSimple(arch, Inst::kIdPabsb, InstOptions::kNone, xmm1, xmm2);
|
155
|
+
printInfoSimple(arch, Inst::kIdPextrw, InstOptions::kNone, eax, mm1, imm(0));
|
156
|
+
printInfoSimple(arch, Inst::kIdPextrw, InstOptions::kNone, eax, xmm1, imm(0));
|
157
|
+
printInfoSimple(arch, Inst::kIdPextrw, InstOptions::kNone, ptr(rax), xmm1, imm(0));
|
158
|
+
|
159
|
+
printInfoSimple(arch, Inst::kIdVpdpbusd, InstOptions::kNone, xmm0, xmm1, xmm2);
|
160
|
+
printInfoSimple(arch, Inst::kIdVpdpbusd, InstOptions::kX86_Vex, xmm0, xmm1, xmm2);
|
161
|
+
|
162
|
+
printInfoSimple(arch, Inst::kIdVaddpd, InstOptions::kNone, ymm0, ymm1, ymm2);
|
163
|
+
printInfoSimple(arch, Inst::kIdVaddpd, InstOptions::kNone, ymm0, ymm30, ymm31);
|
164
|
+
printInfoSimple(arch, Inst::kIdVaddpd, InstOptions::kNone, zmm0, zmm1, zmm2);
|
165
|
+
|
166
|
+
printInfoExtra(arch, Inst::kIdVaddpd, InstOptions::kNone, k1, zmm0, zmm1, zmm2);
|
167
|
+
printInfoExtra(arch, Inst::kIdVaddpd, InstOptions::kX86_ZMask, k1, zmm0, zmm1, zmm2);
|
168
|
+
#endif
|
169
|
+
}
|
170
|
+
|
171
|
+
int main() {
|
172
|
+
printf("AsmJit Instruction Info Test-Suite v%u.%u.%u\n",
|
173
|
+
unsigned((ASMJIT_LIBRARY_VERSION >> 16) ),
|
174
|
+
unsigned((ASMJIT_LIBRARY_VERSION >> 8) & 0xFF),
|
175
|
+
unsigned((ASMJIT_LIBRARY_VERSION ) & 0xFF));
|
176
|
+
printf("\n");
|
177
|
+
|
178
|
+
testX86Arch();
|
179
|
+
|
180
|
+
return 0;
|
181
|
+
}
|
@@ -0,0 +1,257 @@
|
|
1
|
+
// This file is part of AsmJit project <https://asmjit.com>
|
2
|
+
//
|
3
|
+
// See asmjit.h or LICENSE.md for license and copyright information
|
4
|
+
// SPDX-License-Identifier: Zlib
|
5
|
+
|
6
|
+
#ifndef ASMJIT_TEST_MISC_H_INCLUDED
|
7
|
+
#define ASMJIT_TEST_MISC_H_INCLUDED
|
8
|
+
|
9
|
+
#include <asmjit/x86.h>
|
10
|
+
|
11
|
+
namespace asmtest {
|
12
|
+
|
13
|
+
using namespace asmjit;
|
14
|
+
|
15
|
+
// Generates a typical alpha blend function that uses SSE2 instruction set.
|
16
|
+
// This function combines emitting instructions with control flow constructs
|
17
|
+
// like binding Labels and jumping to them. This should be pretty representative.
|
18
|
+
template<typename Emitter>
|
19
|
+
static void generateSseAlphaBlendInternal(
|
20
|
+
Emitter& cc,
|
21
|
+
const x86::Gp& dst, const x86::Gp& src, const x86::Gp& n,
|
22
|
+
const x86::Gp& gp0,
|
23
|
+
const x86::Xmm& simd0, const x86::Xmm& simd1, const x86::Xmm& simd2, const x86::Xmm& simd3,
|
24
|
+
const x86::Xmm& simd4, const x86::Xmm& simd5, const x86::Xmm& simd6, const x86::Xmm& simd7) {
|
25
|
+
|
26
|
+
x86::Gp i = n;
|
27
|
+
x86::Gp j = gp0;
|
28
|
+
|
29
|
+
x86::Xmm vzero = simd0;
|
30
|
+
x86::Xmm v0080 = simd1;
|
31
|
+
x86::Xmm v0101 = simd2;
|
32
|
+
|
33
|
+
Label L_SmallLoop = cc.newLabel();
|
34
|
+
Label L_SmallEnd = cc.newLabel();
|
35
|
+
Label L_LargeLoop = cc.newLabel();
|
36
|
+
Label L_LargeEnd = cc.newLabel();
|
37
|
+
Label L_Done = cc.newLabel();
|
38
|
+
|
39
|
+
// Load SIMD Constants.
|
40
|
+
cc.xorps(vzero, vzero);
|
41
|
+
cc.mov(gp0.r32(), 0x00800080);
|
42
|
+
cc.movd(v0080, gp0.r32());
|
43
|
+
cc.mov(gp0.r32(), 0x01010101);
|
44
|
+
cc.movd(v0101, gp0.r32());
|
45
|
+
cc.pshufd(v0080, v0080, x86::shuffleImm(0, 0, 0, 0));
|
46
|
+
cc.pshufd(v0101, v0101, x86::shuffleImm(0, 0, 0, 0));
|
47
|
+
|
48
|
+
// How many pixels have to be processed to make the loop aligned.
|
49
|
+
cc.xor_(j, j);
|
50
|
+
cc.sub(j, dst);
|
51
|
+
cc.and_(j, 15);
|
52
|
+
cc.shr(j, 2);
|
53
|
+
cc.jz(L_SmallEnd);
|
54
|
+
|
55
|
+
cc.cmp(j, i);
|
56
|
+
cc.cmovg(j, i); // j = min(i, j)
|
57
|
+
cc.sub(i, j); // i -= j
|
58
|
+
|
59
|
+
// Small loop.
|
60
|
+
cc.bind(L_SmallLoop);
|
61
|
+
{
|
62
|
+
x86::Xmm x0 = simd3;
|
63
|
+
x86::Xmm y0 = simd4;
|
64
|
+
x86::Xmm a0 = simd5;
|
65
|
+
|
66
|
+
cc.movd(y0, x86::ptr(src));
|
67
|
+
cc.movd(x0, x86::ptr(dst));
|
68
|
+
|
69
|
+
cc.pcmpeqb(a0, a0);
|
70
|
+
cc.pxor(a0, y0);
|
71
|
+
cc.psrlw(a0, 8);
|
72
|
+
cc.punpcklbw(x0, vzero);
|
73
|
+
|
74
|
+
cc.pshuflw(a0, a0, x86::shuffleImm(1, 1, 1, 1));
|
75
|
+
cc.punpcklbw(y0, vzero);
|
76
|
+
|
77
|
+
cc.pmullw(x0, a0);
|
78
|
+
cc.paddsw(x0, v0080);
|
79
|
+
cc.pmulhuw(x0, v0101);
|
80
|
+
|
81
|
+
cc.paddw(x0, y0);
|
82
|
+
cc.packuswb(x0, x0);
|
83
|
+
|
84
|
+
cc.movd(x86::ptr(dst), x0);
|
85
|
+
|
86
|
+
cc.add(dst, 4);
|
87
|
+
cc.add(src, 4);
|
88
|
+
|
89
|
+
cc.dec(j);
|
90
|
+
cc.jnz(L_SmallLoop);
|
91
|
+
}
|
92
|
+
|
93
|
+
// Second section, prepare for an aligned loop.
|
94
|
+
cc.bind(L_SmallEnd);
|
95
|
+
|
96
|
+
cc.test(i, i);
|
97
|
+
cc.mov(j, i);
|
98
|
+
cc.jz(L_Done);
|
99
|
+
|
100
|
+
cc.and_(j, 3);
|
101
|
+
cc.shr(i, 2);
|
102
|
+
cc.jz(L_LargeEnd);
|
103
|
+
|
104
|
+
// Aligned loop.
|
105
|
+
cc.bind(L_LargeLoop);
|
106
|
+
{
|
107
|
+
x86::Xmm x0 = simd3;
|
108
|
+
x86::Xmm x1 = simd4;
|
109
|
+
x86::Xmm y0 = simd5;
|
110
|
+
x86::Xmm a0 = simd6;
|
111
|
+
x86::Xmm a1 = simd7;
|
112
|
+
|
113
|
+
cc.movups(y0, x86::ptr(src));
|
114
|
+
cc.movaps(x0, x86::ptr(dst));
|
115
|
+
|
116
|
+
cc.pcmpeqb(a0, a0);
|
117
|
+
cc.xorps(a0, y0);
|
118
|
+
cc.movaps(x1, x0);
|
119
|
+
|
120
|
+
cc.psrlw(a0, 8);
|
121
|
+
cc.punpcklbw(x0, vzero);
|
122
|
+
|
123
|
+
cc.movaps(a1, a0);
|
124
|
+
cc.punpcklwd(a0, a0);
|
125
|
+
|
126
|
+
cc.punpckhbw(x1, vzero);
|
127
|
+
cc.punpckhwd(a1, a1);
|
128
|
+
|
129
|
+
cc.pshufd(a0, a0, x86::shuffleImm(3, 3, 1, 1));
|
130
|
+
cc.pshufd(a1, a1, x86::shuffleImm(3, 3, 1, 1));
|
131
|
+
|
132
|
+
cc.pmullw(x0, a0);
|
133
|
+
cc.pmullw(x1, a1);
|
134
|
+
|
135
|
+
cc.paddsw(x0, v0080);
|
136
|
+
cc.paddsw(x1, v0080);
|
137
|
+
|
138
|
+
cc.pmulhuw(x0, v0101);
|
139
|
+
cc.pmulhuw(x1, v0101);
|
140
|
+
|
141
|
+
cc.add(src, 16);
|
142
|
+
cc.packuswb(x0, x1);
|
143
|
+
|
144
|
+
cc.paddw(x0, y0);
|
145
|
+
cc.movaps(x86::ptr(dst), x0);
|
146
|
+
|
147
|
+
cc.add(dst, 16);
|
148
|
+
|
149
|
+
cc.dec(i);
|
150
|
+
cc.jnz(L_LargeLoop);
|
151
|
+
}
|
152
|
+
|
153
|
+
cc.bind(L_LargeEnd);
|
154
|
+
cc.test(j, j);
|
155
|
+
cc.jnz(L_SmallLoop);
|
156
|
+
|
157
|
+
cc.bind(L_Done);
|
158
|
+
}
|
159
|
+
|
160
|
+
static void generateSseAlphaBlend(asmjit::BaseEmitter& emitter, bool emitPrologEpilog) {
|
161
|
+
using namespace asmjit::x86;
|
162
|
+
|
163
|
+
if (emitter.isAssembler()) {
|
164
|
+
Assembler& cc = *emitter.as<Assembler>();
|
165
|
+
|
166
|
+
x86::Gp dst = cc.zax();
|
167
|
+
x86::Gp src = cc.zcx();
|
168
|
+
x86::Gp i = cc.zdx();
|
169
|
+
x86::Gp j = cc.zdi();
|
170
|
+
|
171
|
+
if (emitPrologEpilog) {
|
172
|
+
FuncDetail func;
|
173
|
+
func.init(FuncSignatureT<void, void*, const void*, size_t>(CallConvId::kHost), cc.environment());
|
174
|
+
|
175
|
+
FuncFrame frame;
|
176
|
+
frame.init(func);
|
177
|
+
frame.addDirtyRegs(dst, src, i, j);
|
178
|
+
frame.addDirtyRegs(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
179
|
+
|
180
|
+
FuncArgsAssignment args(&func);
|
181
|
+
args.assignAll(dst, src, i);
|
182
|
+
args.updateFuncFrame(frame);
|
183
|
+
frame.finalize();
|
184
|
+
|
185
|
+
cc.emitProlog(frame);
|
186
|
+
cc.emitArgsAssignment(frame, args);
|
187
|
+
generateSseAlphaBlendInternal(cc, dst, src, i, j, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
188
|
+
cc.emitEpilog(frame);
|
189
|
+
}
|
190
|
+
else {
|
191
|
+
generateSseAlphaBlendInternal(cc, dst, src, i, j, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
192
|
+
}
|
193
|
+
}
|
194
|
+
#ifndef ASMJIT_NO_BUILDER
|
195
|
+
else if (emitter.isBuilder()) {
|
196
|
+
Builder& cc = *emitter.as<Builder>();
|
197
|
+
|
198
|
+
x86::Gp dst = cc.zax();
|
199
|
+
x86::Gp src = cc.zcx();
|
200
|
+
x86::Gp i = cc.zdx();
|
201
|
+
x86::Gp j = cc.zdi();
|
202
|
+
|
203
|
+
if (emitPrologEpilog) {
|
204
|
+
FuncDetail func;
|
205
|
+
func.init(FuncSignatureT<void, void*, const void*, size_t>(CallConvId::kHost), cc.environment());
|
206
|
+
|
207
|
+
FuncFrame frame;
|
208
|
+
frame.init(func);
|
209
|
+
frame.addDirtyRegs(dst, src, i, j);
|
210
|
+
frame.addDirtyRegs(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
211
|
+
|
212
|
+
FuncArgsAssignment args(&func);
|
213
|
+
args.assignAll(dst, src, i);
|
214
|
+
args.updateFuncFrame(frame);
|
215
|
+
frame.finalize();
|
216
|
+
|
217
|
+
cc.emitProlog(frame);
|
218
|
+
cc.emitArgsAssignment(frame, args);
|
219
|
+
generateSseAlphaBlendInternal(cc, dst, src, i, j, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
220
|
+
cc.emitEpilog(frame);
|
221
|
+
}
|
222
|
+
else {
|
223
|
+
generateSseAlphaBlendInternal(cc, dst, src, i, j, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
224
|
+
}
|
225
|
+
}
|
226
|
+
#endif
|
227
|
+
#ifndef ASMJIT_NO_COMPILER
|
228
|
+
else if (emitter.isCompiler()) {
|
229
|
+
Compiler& cc = *emitter.as<Compiler>();
|
230
|
+
|
231
|
+
Gp dst = cc.newIntPtr("dst");
|
232
|
+
Gp src = cc.newIntPtr("src");
|
233
|
+
Gp i = cc.newIntPtr("i");
|
234
|
+
Gp j = cc.newIntPtr("j");
|
235
|
+
|
236
|
+
Xmm v0 = cc.newXmm("v0");
|
237
|
+
Xmm v1 = cc.newXmm("v1");
|
238
|
+
Xmm v2 = cc.newXmm("v2");
|
239
|
+
Xmm v3 = cc.newXmm("v3");
|
240
|
+
Xmm v4 = cc.newXmm("v4");
|
241
|
+
Xmm v5 = cc.newXmm("v5");
|
242
|
+
Xmm v6 = cc.newXmm("v6");
|
243
|
+
Xmm v7 = cc.newXmm("v7");
|
244
|
+
|
245
|
+
FuncNode* funcNode = cc.addFunc(FuncSignatureT<void, void*, const void*, size_t>(CallConvId::kHost));
|
246
|
+
funcNode->setArg(0, dst);
|
247
|
+
funcNode->setArg(1, src);
|
248
|
+
funcNode->setArg(2, i);
|
249
|
+
generateSseAlphaBlendInternal(cc, dst, src, i, j, v0, v1, v2, v3, v4, v5, v6, v7);
|
250
|
+
cc.endFunc();
|
251
|
+
}
|
252
|
+
#endif
|
253
|
+
}
|
254
|
+
|
255
|
+
} // {asmtest}
|
256
|
+
|
257
|
+
#endif // ASMJIT_TEST_MISC_H_INCLUDED
|
@@ -0,0 +1,62 @@
|
|
1
|
+
// This file is part of AsmJit project <https://asmjit.com>
|
2
|
+
//
|
3
|
+
// See asmjit.h or LICENSE.md for license and copyright information
|
4
|
+
// SPDX-License-Identifier: Zlib
|
5
|
+
|
6
|
+
#include <asmjit/core.h>
|
7
|
+
#include <stdio.h>
|
8
|
+
#include <stdlib.h>
|
9
|
+
#include <string.h>
|
10
|
+
|
11
|
+
#include "cmdline.h"
|
12
|
+
|
13
|
+
using namespace asmjit;
|
14
|
+
|
15
|
+
#if !defined(ASMJIT_NO_X86)
|
16
|
+
void benchmarkX86Emitters(uint32_t numIterations, bool testX86, bool testX64) noexcept;
|
17
|
+
#endif
|
18
|
+
|
19
|
+
#if !defined(ASMJIT_NO_AARCH64)
|
20
|
+
void benchmarkA64Emitters(uint32_t numIterations);
|
21
|
+
#endif
|
22
|
+
|
23
|
+
int main(int argc, char* argv[]) {
|
24
|
+
CmdLine cmdLine(argc, argv);
|
25
|
+
uint32_t numIterations = 20000;
|
26
|
+
|
27
|
+
printf("AsmJit Performance Suite v%u.%u.%u:\n\n",
|
28
|
+
unsigned((ASMJIT_LIBRARY_VERSION >> 16) ),
|
29
|
+
unsigned((ASMJIT_LIBRARY_VERSION >> 8) & 0xFF),
|
30
|
+
unsigned((ASMJIT_LIBRARY_VERSION ) & 0xFF));
|
31
|
+
|
32
|
+
printf("Usage:\n");
|
33
|
+
printf(" --help Show usage only\n");
|
34
|
+
printf(" --quick Decrease the number of iterations to make tests quicker\n");
|
35
|
+
printf(" --arch=<ARCH> Select architecture to run ('all' by default)\n");
|
36
|
+
printf("\n");
|
37
|
+
|
38
|
+
if (cmdLine.hasArg("--help"))
|
39
|
+
return 0;
|
40
|
+
|
41
|
+
if (cmdLine.hasArg("--quick"))
|
42
|
+
numIterations = 1000;
|
43
|
+
|
44
|
+
const char* arch = cmdLine.valueOf("--arch", "all");
|
45
|
+
|
46
|
+
#if !defined(ASMJIT_NO_X86)
|
47
|
+
bool testX86 = strcmp(arch, "all") == 0 || strcmp(arch, "x86") == 0;
|
48
|
+
bool testX64 = strcmp(arch, "all") == 0 || strcmp(arch, "x64") == 0;
|
49
|
+
|
50
|
+
if (testX86 || testX64)
|
51
|
+
benchmarkX86Emitters(numIterations, testX86, testX64);
|
52
|
+
#endif
|
53
|
+
|
54
|
+
#if !defined(ASMJIT_NO_AARCH64)
|
55
|
+
bool testAArch64 = strcmp(arch, "all") == 0 || strcmp(arch, "aarch64") == 0;
|
56
|
+
|
57
|
+
if (testAArch64)
|
58
|
+
benchmarkA64Emitters(numIterations);
|
59
|
+
#endif
|
60
|
+
|
61
|
+
return 0;
|
62
|
+
}
|