asmjit 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/asmjit.gemspec +1 -1
- data/ext/asmjit/asmjit/.editorconfig +10 -0
- data/ext/asmjit/asmjit/.github/FUNDING.yml +1 -0
- data/ext/asmjit/asmjit/.github/workflows/build-config.json +47 -0
- data/ext/asmjit/asmjit/.github/workflows/build.yml +156 -0
- data/ext/asmjit/asmjit/.gitignore +6 -0
- data/ext/asmjit/asmjit/CMakeLists.txt +611 -0
- data/ext/asmjit/asmjit/LICENSE.md +17 -0
- data/ext/asmjit/asmjit/README.md +69 -0
- data/ext/asmjit/asmjit/src/asmjit/a64.h +62 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64archtraits_p.h +81 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64assembler.cpp +5115 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64assembler.h +72 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64builder.cpp +51 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64builder.h +57 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64compiler.cpp +60 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64compiler.h +247 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64emithelper.cpp +464 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64emithelper_p.h +50 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64emitter.h +1228 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64formatter.cpp +298 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64formatter_p.h +59 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64func.cpp +189 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64func_p.h +33 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64globals.h +1894 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64instapi.cpp +278 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64instapi_p.h +41 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb.cpp +1957 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb.h +74 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb_p.h +876 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64operand.cpp +85 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64operand.h +312 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64rapass.cpp +852 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64rapass_p.h +105 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64utils.h +179 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/armformatter.cpp +143 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/armformatter_p.h +44 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/armglobals.h +21 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/armoperand.h +621 -0
- data/ext/asmjit/asmjit/src/asmjit/arm.h +62 -0
- data/ext/asmjit/asmjit/src/asmjit/asmjit-scope-begin.h +17 -0
- data/ext/asmjit/asmjit/src/asmjit/asmjit-scope-end.h +9 -0
- data/ext/asmjit/asmjit/src/asmjit/asmjit.h +33 -0
- data/ext/asmjit/asmjit/src/asmjit/core/api-build_p.h +55 -0
- data/ext/asmjit/asmjit/src/asmjit/core/api-config.h +613 -0
- data/ext/asmjit/asmjit/src/asmjit/core/archcommons.h +229 -0
- data/ext/asmjit/asmjit/src/asmjit/core/archtraits.cpp +160 -0
- data/ext/asmjit/asmjit/src/asmjit/core/archtraits.h +290 -0
- data/ext/asmjit/asmjit/src/asmjit/core/assembler.cpp +406 -0
- data/ext/asmjit/asmjit/src/asmjit/core/assembler.h +129 -0
- data/ext/asmjit/asmjit/src/asmjit/core/builder.cpp +889 -0
- data/ext/asmjit/asmjit/src/asmjit/core/builder.h +1391 -0
- data/ext/asmjit/asmjit/src/asmjit/core/codebuffer.h +113 -0
- data/ext/asmjit/asmjit/src/asmjit/core/codeholder.cpp +1149 -0
- data/ext/asmjit/asmjit/src/asmjit/core/codeholder.h +1035 -0
- data/ext/asmjit/asmjit/src/asmjit/core/codewriter.cpp +175 -0
- data/ext/asmjit/asmjit/src/asmjit/core/codewriter_p.h +179 -0
- data/ext/asmjit/asmjit/src/asmjit/core/compiler.cpp +582 -0
- data/ext/asmjit/asmjit/src/asmjit/core/compiler.h +737 -0
- data/ext/asmjit/asmjit/src/asmjit/core/compilerdefs.h +173 -0
- data/ext/asmjit/asmjit/src/asmjit/core/constpool.cpp +363 -0
- data/ext/asmjit/asmjit/src/asmjit/core/constpool.h +250 -0
- data/ext/asmjit/asmjit/src/asmjit/core/cpuinfo.cpp +1162 -0
- data/ext/asmjit/asmjit/src/asmjit/core/cpuinfo.h +813 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emithelper.cpp +323 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emithelper_p.h +58 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emitter.cpp +333 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emitter.h +741 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emitterutils.cpp +129 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emitterutils_p.h +89 -0
- data/ext/asmjit/asmjit/src/asmjit/core/environment.cpp +46 -0
- data/ext/asmjit/asmjit/src/asmjit/core/environment.h +508 -0
- data/ext/asmjit/asmjit/src/asmjit/core/errorhandler.cpp +14 -0
- data/ext/asmjit/asmjit/src/asmjit/core/errorhandler.h +228 -0
- data/ext/asmjit/asmjit/src/asmjit/core/formatter.cpp +584 -0
- data/ext/asmjit/asmjit/src/asmjit/core/formatter.h +247 -0
- data/ext/asmjit/asmjit/src/asmjit/core/formatter_p.h +34 -0
- data/ext/asmjit/asmjit/src/asmjit/core/func.cpp +286 -0
- data/ext/asmjit/asmjit/src/asmjit/core/func.h +1445 -0
- data/ext/asmjit/asmjit/src/asmjit/core/funcargscontext.cpp +293 -0
- data/ext/asmjit/asmjit/src/asmjit/core/funcargscontext_p.h +199 -0
- data/ext/asmjit/asmjit/src/asmjit/core/globals.cpp +133 -0
- data/ext/asmjit/asmjit/src/asmjit/core/globals.h +393 -0
- data/ext/asmjit/asmjit/src/asmjit/core/inst.cpp +113 -0
- data/ext/asmjit/asmjit/src/asmjit/core/inst.h +772 -0
- data/ext/asmjit/asmjit/src/asmjit/core/jitallocator.cpp +1242 -0
- data/ext/asmjit/asmjit/src/asmjit/core/jitallocator.h +261 -0
- data/ext/asmjit/asmjit/src/asmjit/core/jitruntime.cpp +80 -0
- data/ext/asmjit/asmjit/src/asmjit/core/jitruntime.h +89 -0
- data/ext/asmjit/asmjit/src/asmjit/core/logger.cpp +69 -0
- data/ext/asmjit/asmjit/src/asmjit/core/logger.h +198 -0
- data/ext/asmjit/asmjit/src/asmjit/core/misc_p.h +33 -0
- data/ext/asmjit/asmjit/src/asmjit/core/operand.cpp +132 -0
- data/ext/asmjit/asmjit/src/asmjit/core/operand.h +1611 -0
- data/ext/asmjit/asmjit/src/asmjit/core/osutils.cpp +84 -0
- data/ext/asmjit/asmjit/src/asmjit/core/osutils.h +61 -0
- data/ext/asmjit/asmjit/src/asmjit/core/osutils_p.h +68 -0
- data/ext/asmjit/asmjit/src/asmjit/core/raassignment_p.h +418 -0
- data/ext/asmjit/asmjit/src/asmjit/core/rabuilders_p.h +612 -0
- data/ext/asmjit/asmjit/src/asmjit/core/radefs_p.h +1204 -0
- data/ext/asmjit/asmjit/src/asmjit/core/ralocal.cpp +1166 -0
- data/ext/asmjit/asmjit/src/asmjit/core/ralocal_p.h +254 -0
- data/ext/asmjit/asmjit/src/asmjit/core/rapass.cpp +1969 -0
- data/ext/asmjit/asmjit/src/asmjit/core/rapass_p.h +1183 -0
- data/ext/asmjit/asmjit/src/asmjit/core/rastack.cpp +184 -0
- data/ext/asmjit/asmjit/src/asmjit/core/rastack_p.h +171 -0
- data/ext/asmjit/asmjit/src/asmjit/core/string.cpp +559 -0
- data/ext/asmjit/asmjit/src/asmjit/core/string.h +372 -0
- data/ext/asmjit/asmjit/src/asmjit/core/support.cpp +494 -0
- data/ext/asmjit/asmjit/src/asmjit/core/support.h +1773 -0
- data/ext/asmjit/asmjit/src/asmjit/core/target.cpp +14 -0
- data/ext/asmjit/asmjit/src/asmjit/core/target.h +53 -0
- data/ext/asmjit/asmjit/src/asmjit/core/type.cpp +74 -0
- data/ext/asmjit/asmjit/src/asmjit/core/type.h +419 -0
- data/ext/asmjit/asmjit/src/asmjit/core/virtmem.cpp +722 -0
- data/ext/asmjit/asmjit/src/asmjit/core/virtmem.h +242 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zone.cpp +353 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zone.h +615 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonehash.cpp +309 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonehash.h +186 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonelist.cpp +163 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonelist.h +209 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonestack.cpp +176 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonestack.h +239 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonestring.h +120 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonetree.cpp +99 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonetree.h +380 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonevector.cpp +356 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonevector.h +690 -0
- data/ext/asmjit/asmjit/src/asmjit/core.h +1861 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86archtraits_p.h +148 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86assembler.cpp +5110 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86assembler.h +685 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86builder.cpp +52 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86builder.h +351 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86compiler.cpp +61 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86compiler.h +721 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86emithelper.cpp +619 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86emithelper_p.h +60 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86emitter.h +4315 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86formatter.cpp +944 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86formatter_p.h +58 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86func.cpp +503 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86func_p.h +33 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86globals.h +2169 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86instapi.cpp +1732 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86instapi_p.h +41 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb.cpp +4427 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb.h +563 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb_p.h +311 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86opcode_p.h +436 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86operand.cpp +231 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86operand.h +1085 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86rapass.cpp +1509 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86rapass_p.h +94 -0
- data/ext/asmjit/asmjit/src/asmjit/x86.h +93 -0
- data/ext/asmjit/asmjit/src/asmjit.natvis +245 -0
- data/ext/asmjit/asmjit/test/asmjit_test_assembler.cpp +84 -0
- data/ext/asmjit/asmjit/test/asmjit_test_assembler.h +85 -0
- data/ext/asmjit/asmjit/test/asmjit_test_assembler_a64.cpp +4006 -0
- data/ext/asmjit/asmjit/test/asmjit_test_assembler_x64.cpp +17833 -0
- data/ext/asmjit/asmjit/test/asmjit_test_assembler_x86.cpp +8300 -0
- data/ext/asmjit/asmjit/test/asmjit_test_compiler.cpp +253 -0
- data/ext/asmjit/asmjit/test/asmjit_test_compiler.h +73 -0
- data/ext/asmjit/asmjit/test/asmjit_test_compiler_a64.cpp +690 -0
- data/ext/asmjit/asmjit/test/asmjit_test_compiler_x86.cpp +4317 -0
- data/ext/asmjit/asmjit/test/asmjit_test_emitters.cpp +197 -0
- data/ext/asmjit/asmjit/test/asmjit_test_instinfo.cpp +181 -0
- data/ext/asmjit/asmjit/test/asmjit_test_misc.h +257 -0
- data/ext/asmjit/asmjit/test/asmjit_test_perf.cpp +62 -0
- data/ext/asmjit/asmjit/test/asmjit_test_perf.h +61 -0
- data/ext/asmjit/asmjit/test/asmjit_test_perf_a64.cpp +699 -0
- data/ext/asmjit/asmjit/test/asmjit_test_perf_x86.cpp +5032 -0
- data/ext/asmjit/asmjit/test/asmjit_test_unit.cpp +172 -0
- data/ext/asmjit/asmjit/test/asmjit_test_x86_sections.cpp +172 -0
- data/ext/asmjit/asmjit/test/asmjitutils.h +38 -0
- data/ext/asmjit/asmjit/test/broken.cpp +312 -0
- data/ext/asmjit/asmjit/test/broken.h +148 -0
- data/ext/asmjit/asmjit/test/cmdline.h +61 -0
- data/ext/asmjit/asmjit/test/performancetimer.h +41 -0
- data/ext/asmjit/asmjit/tools/configure-makefiles.sh +13 -0
- data/ext/asmjit/asmjit/tools/configure-ninja.sh +13 -0
- data/ext/asmjit/asmjit/tools/configure-sanitizers.sh +13 -0
- data/ext/asmjit/asmjit/tools/configure-vs2019-x64.bat +2 -0
- data/ext/asmjit/asmjit/tools/configure-vs2019-x86.bat +2 -0
- data/ext/asmjit/asmjit/tools/configure-vs2022-x64.bat +2 -0
- data/ext/asmjit/asmjit/tools/configure-vs2022-x86.bat +2 -0
- data/ext/asmjit/asmjit/tools/configure-xcode.sh +8 -0
- data/ext/asmjit/asmjit/tools/enumgen.js +417 -0
- data/ext/asmjit/asmjit/tools/enumgen.sh +3 -0
- data/ext/asmjit/asmjit/tools/tablegen-arm.js +365 -0
- data/ext/asmjit/asmjit/tools/tablegen-arm.sh +3 -0
- data/ext/asmjit/asmjit/tools/tablegen-x86.js +2638 -0
- data/ext/asmjit/asmjit/tools/tablegen-x86.sh +3 -0
- data/ext/asmjit/asmjit/tools/tablegen.js +947 -0
- data/ext/asmjit/asmjit/tools/tablegen.sh +4 -0
- data/ext/asmjit/asmjit.cc +18 -0
- data/lib/asmjit/version.rb +1 -1
- metadata +197 -2
@@ -0,0 +1,1162 @@
|
|
1
|
+
// This file is part of AsmJit project <https://asmjit.com>
|
2
|
+
//
|
3
|
+
// See asmjit.h or LICENSE.md for license and copyright information
|
4
|
+
// SPDX-License-Identifier: Zlib
|
5
|
+
|
6
|
+
#include "../core/api-build_p.h"
|
7
|
+
#include "../core/cpuinfo.h"
|
8
|
+
#include "../core/support.h"
|
9
|
+
|
10
|
+
#if !defined(_WIN32)
|
11
|
+
#include <errno.h>
|
12
|
+
#include <sys/utsname.h>
|
13
|
+
#include <unistd.h>
|
14
|
+
#endif
|
15
|
+
|
16
|
+
// Required by `getauxval()` on Linux.
|
17
|
+
#if defined(__linux__)
|
18
|
+
#include <sys/auxv.h>
|
19
|
+
#endif
|
20
|
+
|
21
|
+
//! Required to detect CPU and features on Apple platforms.
|
22
|
+
#if defined(__APPLE__)
|
23
|
+
#include <mach/machine.h>
|
24
|
+
#include <sys/types.h>
|
25
|
+
#include <sys/sysctl.h>
|
26
|
+
#endif
|
27
|
+
|
28
|
+
// Required by `__cpuidex()` and `_xgetbv()`.
|
29
|
+
#if defined(_MSC_VER)
|
30
|
+
#include <intrin.h>
|
31
|
+
#endif
|
32
|
+
|
33
|
+
ASMJIT_BEGIN_NAMESPACE
|
34
|
+
|
35
|
+
// CpuInfo - Detect - HW-Thread Count
|
36
|
+
// ==================================
|
37
|
+
|
38
|
+
#if defined(_WIN32)
|
39
|
+
static inline uint32_t detectHWThreadCount() noexcept {
|
40
|
+
SYSTEM_INFO info;
|
41
|
+
::GetSystemInfo(&info);
|
42
|
+
return info.dwNumberOfProcessors;
|
43
|
+
}
|
44
|
+
#elif defined(_SC_NPROCESSORS_ONLN)
|
45
|
+
static inline uint32_t detectHWThreadCount() noexcept {
|
46
|
+
long res = ::sysconf(_SC_NPROCESSORS_ONLN);
|
47
|
+
return res <= 0 ? uint32_t(1) : uint32_t(res);
|
48
|
+
}
|
49
|
+
#else
|
50
|
+
static inline uint32_t detectHWThreadCount() noexcept {
|
51
|
+
return 1;
|
52
|
+
}
|
53
|
+
#endif
|
54
|
+
|
55
|
+
// CpuInfo - Detect - X86
|
56
|
+
// ======================
|
57
|
+
|
58
|
+
#if ASMJIT_ARCH_X86
|
59
|
+
|
60
|
+
struct cpuid_t { uint32_t eax, ebx, ecx, edx; };
|
61
|
+
struct xgetbv_t { uint32_t eax, edx; };
|
62
|
+
|
63
|
+
// Executes `cpuid` instruction.
|
64
|
+
static inline void cpuidQuery(cpuid_t* out, uint32_t inEax, uint32_t inEcx = 0) noexcept {
|
65
|
+
#if defined(_MSC_VER)
|
66
|
+
__cpuidex(reinterpret_cast<int*>(out), inEax, inEcx);
|
67
|
+
#elif defined(__GNUC__) && ASMJIT_ARCH_X86 == 32
|
68
|
+
__asm__ __volatile__(
|
69
|
+
"mov %%ebx, %%edi\n"
|
70
|
+
"cpuid\n"
|
71
|
+
"xchg %%edi, %%ebx\n" : "=a"(out->eax), "=D"(out->ebx), "=c"(out->ecx), "=d"(out->edx) : "a"(inEax), "c"(inEcx));
|
72
|
+
#elif defined(__GNUC__) && ASMJIT_ARCH_X86 == 64
|
73
|
+
__asm__ __volatile__(
|
74
|
+
"mov %%rbx, %%rdi\n"
|
75
|
+
"cpuid\n"
|
76
|
+
"xchg %%rdi, %%rbx\n" : "=a"(out->eax), "=D"(out->ebx), "=c"(out->ecx), "=d"(out->edx) : "a"(inEax), "c"(inEcx));
|
77
|
+
#else
|
78
|
+
#error "[asmjit] x86::cpuidQuery() - Unsupported compiler."
|
79
|
+
#endif
|
80
|
+
}
|
81
|
+
|
82
|
+
// Executes 'xgetbv' instruction.
|
83
|
+
static inline void xgetbvQuery(xgetbv_t* out, uint32_t inEcx) noexcept {
|
84
|
+
#if defined(_MSC_VER)
|
85
|
+
uint64_t value = _xgetbv(inEcx);
|
86
|
+
out->eax = uint32_t(value & 0xFFFFFFFFu);
|
87
|
+
out->edx = uint32_t(value >> 32);
|
88
|
+
#elif defined(__GNUC__)
|
89
|
+
uint32_t outEax;
|
90
|
+
uint32_t outEdx;
|
91
|
+
|
92
|
+
// Replaced, because the world is not perfect:
|
93
|
+
// __asm__ __volatile__("xgetbv" : "=a"(outEax), "=d"(outEdx) : "c"(inEcx));
|
94
|
+
__asm__ __volatile__(".byte 0x0F, 0x01, 0xD0" : "=a"(outEax), "=d"(outEdx) : "c"(inEcx));
|
95
|
+
|
96
|
+
out->eax = outEax;
|
97
|
+
out->edx = outEdx;
|
98
|
+
#else
|
99
|
+
out->eax = 0;
|
100
|
+
out->edx = 0;
|
101
|
+
#endif
|
102
|
+
}
|
103
|
+
|
104
|
+
// Map a 12-byte vendor string returned by `cpuid` into a `CpuInfo::Vendor` ID.
|
105
|
+
static inline void simplifyCpuVendor(CpuInfo& cpu, uint32_t d0, uint32_t d1, uint32_t d2) noexcept {
|
106
|
+
struct Vendor {
|
107
|
+
char normalized[8];
|
108
|
+
union { char text[12]; uint32_t d[3]; };
|
109
|
+
};
|
110
|
+
|
111
|
+
static const Vendor table[] = {
|
112
|
+
{ { 'A', 'M', 'D' }, {{ 'A', 'u', 't', 'h', 'e', 'n', 't', 'i', 'c', 'A', 'M', 'D' }} },
|
113
|
+
{ { 'I', 'N', 'T', 'E', 'L' }, {{ 'G', 'e', 'n', 'u', 'i', 'n', 'e', 'I', 'n', 't', 'e', 'l' }} },
|
114
|
+
{ { 'V', 'I', 'A' }, {{ 'C', 'e', 'n', 't', 'a', 'u', 'r', 'H', 'a', 'u', 'l', 's' }} },
|
115
|
+
{ { 'V', 'I', 'A' }, {{ 'V', 'I', 'A', 0 , 'V', 'I', 'A', 0 , 'V', 'I', 'A', 0 }} },
|
116
|
+
{ { 'U', 'N', 'K', 'N', 'O', 'W', 'N' }, {{ 0 }} }
|
117
|
+
};
|
118
|
+
|
119
|
+
uint32_t i;
|
120
|
+
for (i = 0; i < ASMJIT_ARRAY_SIZE(table) - 1; i++)
|
121
|
+
if (table[i].d[0] == d0 && table[i].d[1] == d1 && table[i].d[2] == d2)
|
122
|
+
break;
|
123
|
+
memcpy(cpu._vendor.str, table[i].normalized, 8);
|
124
|
+
}
|
125
|
+
|
126
|
+
static ASMJIT_FAVOR_SIZE void simplifyCpuBrand(char* s) noexcept {
|
127
|
+
char* d = s;
|
128
|
+
|
129
|
+
char c = s[0];
|
130
|
+
char prev = 0;
|
131
|
+
|
132
|
+
// Used to always clear the current character to ensure that the result
|
133
|
+
// doesn't contain garbage after a new null terminator is placed at the end.
|
134
|
+
s[0] = '\0';
|
135
|
+
|
136
|
+
for (;;) {
|
137
|
+
if (!c)
|
138
|
+
break;
|
139
|
+
|
140
|
+
if (!(c == ' ' && (prev == '@' || s[1] == ' ' || s[1] == '@'))) {
|
141
|
+
*d++ = c;
|
142
|
+
prev = c;
|
143
|
+
}
|
144
|
+
|
145
|
+
c = *++s;
|
146
|
+
s[0] = '\0';
|
147
|
+
}
|
148
|
+
|
149
|
+
d[0] = '\0';
|
150
|
+
}
|
151
|
+
|
152
|
+
static ASMJIT_FAVOR_SIZE void detectX86Cpu(CpuInfo& cpu) noexcept {
|
153
|
+
using Support::bitTest;
|
154
|
+
|
155
|
+
cpuid_t regs;
|
156
|
+
xgetbv_t xcr0 { 0, 0 };
|
157
|
+
CpuFeatures::X86& features = cpu.features().x86();
|
158
|
+
|
159
|
+
cpu._wasDetected = true;
|
160
|
+
cpu._maxLogicalProcessors = 1;
|
161
|
+
|
162
|
+
// We are gonna execute CPUID, which was introduced by I486, so it's the requirement.
|
163
|
+
features.add(CpuFeatures::X86::kI486);
|
164
|
+
|
165
|
+
// CPUID EAX=0
|
166
|
+
// -----------
|
167
|
+
|
168
|
+
// Get vendor string/id.
|
169
|
+
cpuidQuery(®s, 0x0);
|
170
|
+
|
171
|
+
uint32_t maxId = regs.eax;
|
172
|
+
uint32_t maxSubLeafId_0x7 = 0;
|
173
|
+
|
174
|
+
simplifyCpuVendor(cpu, regs.ebx, regs.edx, regs.ecx);
|
175
|
+
|
176
|
+
// CPUID EAX=1
|
177
|
+
// -----------
|
178
|
+
|
179
|
+
if (maxId >= 0x1) {
|
180
|
+
// Get feature flags in ECX/EDX and family/model in EAX.
|
181
|
+
cpuidQuery(®s, 0x1);
|
182
|
+
|
183
|
+
// Fill family and model fields.
|
184
|
+
uint32_t modelId = (regs.eax >> 4) & 0x0F;
|
185
|
+
uint32_t familyId = (regs.eax >> 8) & 0x0F;
|
186
|
+
|
187
|
+
// Use extended family and model fields.
|
188
|
+
if (familyId == 0x06u || familyId == 0x0Fu)
|
189
|
+
modelId += (((regs.eax >> 16) & 0x0Fu) << 4);
|
190
|
+
|
191
|
+
if (familyId == 0x0Fu)
|
192
|
+
familyId += ((regs.eax >> 20) & 0xFFu);
|
193
|
+
|
194
|
+
cpu._modelId = modelId;
|
195
|
+
cpu._familyId = familyId;
|
196
|
+
cpu._brandId = ((regs.ebx ) & 0xFF);
|
197
|
+
cpu._processorType = ((regs.eax >> 12) & 0x03);
|
198
|
+
cpu._maxLogicalProcessors = ((regs.ebx >> 16) & 0xFF);
|
199
|
+
cpu._stepping = ((regs.eax ) & 0x0F);
|
200
|
+
cpu._cacheLineSize = ((regs.ebx >> 8) & 0xFF) * 8;
|
201
|
+
|
202
|
+
features.addIf(bitTest(regs.ecx, 0), CpuFeatures::X86::kSSE3);
|
203
|
+
features.addIf(bitTest(regs.ecx, 1), CpuFeatures::X86::kPCLMULQDQ);
|
204
|
+
features.addIf(bitTest(regs.ecx, 3), CpuFeatures::X86::kMONITOR);
|
205
|
+
features.addIf(bitTest(regs.ecx, 5), CpuFeatures::X86::kVMX);
|
206
|
+
features.addIf(bitTest(regs.ecx, 6), CpuFeatures::X86::kSMX);
|
207
|
+
features.addIf(bitTest(regs.ecx, 9), CpuFeatures::X86::kSSSE3);
|
208
|
+
features.addIf(bitTest(regs.ecx, 13), CpuFeatures::X86::kCMPXCHG16B);
|
209
|
+
features.addIf(bitTest(regs.ecx, 19), CpuFeatures::X86::kSSE4_1);
|
210
|
+
features.addIf(bitTest(regs.ecx, 20), CpuFeatures::X86::kSSE4_2);
|
211
|
+
features.addIf(bitTest(regs.ecx, 22), CpuFeatures::X86::kMOVBE);
|
212
|
+
features.addIf(bitTest(regs.ecx, 23), CpuFeatures::X86::kPOPCNT);
|
213
|
+
features.addIf(bitTest(regs.ecx, 25), CpuFeatures::X86::kAESNI);
|
214
|
+
features.addIf(bitTest(regs.ecx, 26), CpuFeatures::X86::kXSAVE);
|
215
|
+
features.addIf(bitTest(regs.ecx, 27), CpuFeatures::X86::kOSXSAVE);
|
216
|
+
features.addIf(bitTest(regs.ecx, 30), CpuFeatures::X86::kRDRAND);
|
217
|
+
features.addIf(bitTest(regs.edx, 0), CpuFeatures::X86::kFPU);
|
218
|
+
features.addIf(bitTest(regs.edx, 4), CpuFeatures::X86::kRDTSC);
|
219
|
+
features.addIf(bitTest(regs.edx, 5), CpuFeatures::X86::kMSR);
|
220
|
+
features.addIf(bitTest(regs.edx, 8), CpuFeatures::X86::kCMPXCHG8B);
|
221
|
+
features.addIf(bitTest(regs.edx, 15), CpuFeatures::X86::kCMOV);
|
222
|
+
features.addIf(bitTest(regs.edx, 19), CpuFeatures::X86::kCLFLUSH);
|
223
|
+
features.addIf(bitTest(regs.edx, 23), CpuFeatures::X86::kMMX);
|
224
|
+
features.addIf(bitTest(regs.edx, 24), CpuFeatures::X86::kFXSR);
|
225
|
+
features.addIf(bitTest(regs.edx, 25), CpuFeatures::X86::kSSE);
|
226
|
+
features.addIf(bitTest(regs.edx, 25), CpuFeatures::X86::kSSE, CpuFeatures::X86::kSSE2);
|
227
|
+
features.addIf(bitTest(regs.edx, 28), CpuFeatures::X86::kMT);
|
228
|
+
|
229
|
+
// Get the content of XCR0 if supported by the CPU and enabled by the OS.
|
230
|
+
if (features.hasXSAVE() && features.hasOSXSAVE()) {
|
231
|
+
xgetbvQuery(&xcr0, 0);
|
232
|
+
}
|
233
|
+
|
234
|
+
// Detect AVX+.
|
235
|
+
if (bitTest(regs.ecx, 28)) {
|
236
|
+
// - XCR0[2:1] == 11b
|
237
|
+
// XMM & YMM states need to be enabled by OS.
|
238
|
+
if ((xcr0.eax & 0x00000006u) == 0x00000006u) {
|
239
|
+
features.add(CpuFeatures::X86::kAVX);
|
240
|
+
features.addIf(bitTest(regs.ecx, 12), CpuFeatures::X86::kFMA);
|
241
|
+
features.addIf(bitTest(regs.ecx, 29), CpuFeatures::X86::kF16C);
|
242
|
+
}
|
243
|
+
}
|
244
|
+
}
|
245
|
+
|
246
|
+
constexpr uint32_t kXCR0_AMX_Bits = 0x3u << 17;
|
247
|
+
bool amxEnabledByOS = (xcr0.eax & kXCR0_AMX_Bits) == kXCR0_AMX_Bits;
|
248
|
+
|
249
|
+
#if defined(__APPLE__)
|
250
|
+
// Apple platform provides on-demand AVX512 support. When an AVX512 instruction is used the first time it results
|
251
|
+
// in #UD, which would cause the thread being promoted to use AVX512 support by the OS in addition to enabling the
|
252
|
+
// necessary bits in XCR0 register.
|
253
|
+
bool avx512EnabledByOS = true;
|
254
|
+
#else
|
255
|
+
// - XCR0[2:1] == 11b - XMM/YMM states need to be enabled by OS.
|
256
|
+
// - XCR0[7:5] == 111b - Upper 256-bit of ZMM0-XMM15 and ZMM16-ZMM31 need to be enabled by OS.
|
257
|
+
constexpr uint32_t kXCR0_AVX512_Bits = (0x3u << 1) | (0x7u << 5);
|
258
|
+
bool avx512EnabledByOS = (xcr0.eax & kXCR0_AVX512_Bits) == kXCR0_AVX512_Bits;
|
259
|
+
#endif
|
260
|
+
|
261
|
+
// CPUID EAX=7 ECX=0
|
262
|
+
// -----------------
|
263
|
+
|
264
|
+
// Detect new features if the processor supports CPUID-07.
|
265
|
+
bool maybeMPX = false;
|
266
|
+
|
267
|
+
if (maxId >= 0x7) {
|
268
|
+
cpuidQuery(®s, 0x7);
|
269
|
+
|
270
|
+
maybeMPX = bitTest(regs.ebx, 14);
|
271
|
+
maxSubLeafId_0x7 = regs.eax;
|
272
|
+
|
273
|
+
features.addIf(bitTest(regs.ebx, 0), CpuFeatures::X86::kFSGSBASE);
|
274
|
+
features.addIf(bitTest(regs.ebx, 3), CpuFeatures::X86::kBMI);
|
275
|
+
features.addIf(bitTest(regs.ebx, 4), CpuFeatures::X86::kHLE);
|
276
|
+
features.addIf(bitTest(regs.ebx, 7), CpuFeatures::X86::kSMEP);
|
277
|
+
features.addIf(bitTest(regs.ebx, 8), CpuFeatures::X86::kBMI2);
|
278
|
+
features.addIf(bitTest(regs.ebx, 9), CpuFeatures::X86::kERMS);
|
279
|
+
features.addIf(bitTest(regs.ebx, 11), CpuFeatures::X86::kRTM);
|
280
|
+
features.addIf(bitTest(regs.ebx, 18), CpuFeatures::X86::kRDSEED);
|
281
|
+
features.addIf(bitTest(regs.ebx, 19), CpuFeatures::X86::kADX);
|
282
|
+
features.addIf(bitTest(regs.ebx, 20), CpuFeatures::X86::kSMAP);
|
283
|
+
features.addIf(bitTest(regs.ebx, 23), CpuFeatures::X86::kCLFLUSHOPT);
|
284
|
+
features.addIf(bitTest(regs.ebx, 24), CpuFeatures::X86::kCLWB);
|
285
|
+
features.addIf(bitTest(regs.ebx, 29), CpuFeatures::X86::kSHA);
|
286
|
+
features.addIf(bitTest(regs.ecx, 0), CpuFeatures::X86::kPREFETCHWT1);
|
287
|
+
features.addIf(bitTest(regs.ecx, 4), CpuFeatures::X86::kOSPKE);
|
288
|
+
features.addIf(bitTest(regs.ecx, 5), CpuFeatures::X86::kWAITPKG);
|
289
|
+
features.addIf(bitTest(regs.ecx, 7), CpuFeatures::X86::kCET_SS);
|
290
|
+
features.addIf(bitTest(regs.ecx, 8), CpuFeatures::X86::kGFNI);
|
291
|
+
features.addIf(bitTest(regs.ecx, 9), CpuFeatures::X86::kVAES);
|
292
|
+
features.addIf(bitTest(regs.ecx, 10), CpuFeatures::X86::kVPCLMULQDQ);
|
293
|
+
features.addIf(bitTest(regs.ecx, 22), CpuFeatures::X86::kRDPID);
|
294
|
+
features.addIf(bitTest(regs.ecx, 25), CpuFeatures::X86::kCLDEMOTE);
|
295
|
+
features.addIf(bitTest(regs.ecx, 27), CpuFeatures::X86::kMOVDIRI);
|
296
|
+
features.addIf(bitTest(regs.ecx, 28), CpuFeatures::X86::kMOVDIR64B);
|
297
|
+
features.addIf(bitTest(regs.ecx, 29), CpuFeatures::X86::kENQCMD);
|
298
|
+
features.addIf(bitTest(regs.edx, 5), CpuFeatures::X86::kUINTR);
|
299
|
+
features.addIf(bitTest(regs.edx, 14), CpuFeatures::X86::kSERIALIZE);
|
300
|
+
features.addIf(bitTest(regs.edx, 16), CpuFeatures::X86::kTSXLDTRK);
|
301
|
+
features.addIf(bitTest(regs.edx, 18), CpuFeatures::X86::kPCONFIG);
|
302
|
+
features.addIf(bitTest(regs.edx, 20), CpuFeatures::X86::kCET_IBT);
|
303
|
+
|
304
|
+
// Detect 'TSX' - Requires at least one of `HLE` and `RTM` features.
|
305
|
+
if (features.hasHLE() || features.hasRTM())
|
306
|
+
features.add(CpuFeatures::X86::kTSX);
|
307
|
+
|
308
|
+
// Detect 'AVX2' - Requires AVX as well.
|
309
|
+
if (bitTest(regs.ebx, 5) && features.hasAVX())
|
310
|
+
features.add(CpuFeatures::X86::kAVX2);
|
311
|
+
|
312
|
+
// Detect 'AVX512'.
|
313
|
+
if (avx512EnabledByOS && bitTest(regs.ebx, 16)) {
|
314
|
+
features.add(CpuFeatures::X86::kAVX512_F);
|
315
|
+
|
316
|
+
features.addIf(bitTest(regs.ebx, 17), CpuFeatures::X86::kAVX512_DQ);
|
317
|
+
features.addIf(bitTest(regs.ebx, 21), CpuFeatures::X86::kAVX512_IFMA);
|
318
|
+
features.addIf(bitTest(regs.ebx, 26), CpuFeatures::X86::kAVX512_PFI);
|
319
|
+
features.addIf(bitTest(regs.ebx, 27), CpuFeatures::X86::kAVX512_ERI);
|
320
|
+
features.addIf(bitTest(regs.ebx, 28), CpuFeatures::X86::kAVX512_CDI);
|
321
|
+
features.addIf(bitTest(regs.ebx, 30), CpuFeatures::X86::kAVX512_BW);
|
322
|
+
features.addIf(bitTest(regs.ebx, 31), CpuFeatures::X86::kAVX512_VL);
|
323
|
+
features.addIf(bitTest(regs.ecx, 1), CpuFeatures::X86::kAVX512_VBMI);
|
324
|
+
features.addIf(bitTest(regs.ecx, 6), CpuFeatures::X86::kAVX512_VBMI2);
|
325
|
+
features.addIf(bitTest(regs.ecx, 11), CpuFeatures::X86::kAVX512_VNNI);
|
326
|
+
features.addIf(bitTest(regs.ecx, 12), CpuFeatures::X86::kAVX512_BITALG);
|
327
|
+
features.addIf(bitTest(regs.ecx, 14), CpuFeatures::X86::kAVX512_VPOPCNTDQ);
|
328
|
+
features.addIf(bitTest(regs.edx, 2), CpuFeatures::X86::kAVX512_4VNNIW);
|
329
|
+
features.addIf(bitTest(regs.edx, 3), CpuFeatures::X86::kAVX512_4FMAPS);
|
330
|
+
features.addIf(bitTest(regs.edx, 8), CpuFeatures::X86::kAVX512_VP2INTERSECT);
|
331
|
+
features.addIf(bitTest(regs.edx, 23), CpuFeatures::X86::kAVX512_FP16);
|
332
|
+
}
|
333
|
+
|
334
|
+
// Detect 'AMX'.
|
335
|
+
if (amxEnabledByOS) {
|
336
|
+
features.addIf(bitTest(regs.edx, 22), CpuFeatures::X86::kAMX_BF16);
|
337
|
+
features.addIf(bitTest(regs.edx, 24), CpuFeatures::X86::kAMX_TILE);
|
338
|
+
features.addIf(bitTest(regs.edx, 25), CpuFeatures::X86::kAMX_INT8);
|
339
|
+
}
|
340
|
+
}
|
341
|
+
|
342
|
+
// CPUID EAX=7 ECX=1
|
343
|
+
// -----------------
|
344
|
+
|
345
|
+
if (features.hasAVX512_F() && maxSubLeafId_0x7 >= 1) {
|
346
|
+
cpuidQuery(®s, 0x7, 1);
|
347
|
+
|
348
|
+
features.addIf(bitTest(regs.eax, 3), CpuFeatures::X86::kAVX_VNNI);
|
349
|
+
features.addIf(bitTest(regs.eax, 5), CpuFeatures::X86::kAVX512_BF16);
|
350
|
+
features.addIf(bitTest(regs.eax, 22), CpuFeatures::X86::kHRESET);
|
351
|
+
}
|
352
|
+
|
353
|
+
// CPUID EAX=13 ECX=0
|
354
|
+
// ------------------
|
355
|
+
|
356
|
+
if (maxId >= 0xD) {
|
357
|
+
cpuidQuery(®s, 0xD, 0);
|
358
|
+
|
359
|
+
// Both CPUID result and XCR0 has to be enabled to have support for MPX.
|
360
|
+
if (((regs.eax & xcr0.eax) & 0x00000018u) == 0x00000018u && maybeMPX)
|
361
|
+
features.add(CpuFeatures::X86::kMPX);
|
362
|
+
|
363
|
+
cpuidQuery(®s, 0xD, 1);
|
364
|
+
|
365
|
+
features.addIf(bitTest(regs.eax, 0), CpuFeatures::X86::kXSAVEOPT);
|
366
|
+
features.addIf(bitTest(regs.eax, 1), CpuFeatures::X86::kXSAVEC);
|
367
|
+
features.addIf(bitTest(regs.eax, 3), CpuFeatures::X86::kXSAVES);
|
368
|
+
}
|
369
|
+
|
370
|
+
// CPUID EAX=14 ECX=0
|
371
|
+
// ------------------
|
372
|
+
|
373
|
+
if (maxId >= 0xE) {
|
374
|
+
cpuidQuery(®s, 0xE, 0);
|
375
|
+
|
376
|
+
features.addIf(bitTest(regs.ebx, 4), CpuFeatures::X86::kPTWRITE);
|
377
|
+
}
|
378
|
+
|
379
|
+
// CPUID EAX=0x80000000...maxId
|
380
|
+
// ----------------------------
|
381
|
+
|
382
|
+
maxId = 0x80000000u;
|
383
|
+
uint32_t i = maxId;
|
384
|
+
|
385
|
+
// The highest EAX that we understand.
|
386
|
+
constexpr uint32_t kHighestProcessedEAX = 0x8000001Fu;
|
387
|
+
|
388
|
+
// Several CPUID calls are required to get the whole branc string. It's easier
|
389
|
+
// to copy one DWORD at a time instead of copying the string a byte by byte.
|
390
|
+
uint32_t* brand = cpu._brand.u32;
|
391
|
+
do {
|
392
|
+
cpuidQuery(®s, i);
|
393
|
+
switch (i) {
|
394
|
+
case 0x80000000u:
|
395
|
+
maxId = Support::min<uint32_t>(regs.eax, kHighestProcessedEAX);
|
396
|
+
break;
|
397
|
+
|
398
|
+
case 0x80000001u:
|
399
|
+
features.addIf(bitTest(regs.ecx, 0), CpuFeatures::X86::kLAHFSAHF);
|
400
|
+
features.addIf(bitTest(regs.ecx, 2), CpuFeatures::X86::kSVM);
|
401
|
+
features.addIf(bitTest(regs.ecx, 5), CpuFeatures::X86::kLZCNT);
|
402
|
+
features.addIf(bitTest(regs.ecx, 6), CpuFeatures::X86::kSSE4A);
|
403
|
+
features.addIf(bitTest(regs.ecx, 7), CpuFeatures::X86::kMSSE);
|
404
|
+
features.addIf(bitTest(regs.ecx, 8), CpuFeatures::X86::kPREFETCHW);
|
405
|
+
features.addIf(bitTest(regs.ecx, 12), CpuFeatures::X86::kSKINIT);
|
406
|
+
features.addIf(bitTest(regs.ecx, 15), CpuFeatures::X86::kLWP);
|
407
|
+
features.addIf(bitTest(regs.ecx, 21), CpuFeatures::X86::kTBM);
|
408
|
+
features.addIf(bitTest(regs.ecx, 29), CpuFeatures::X86::kMONITORX);
|
409
|
+
features.addIf(bitTest(regs.edx, 20), CpuFeatures::X86::kNX);
|
410
|
+
features.addIf(bitTest(regs.edx, 21), CpuFeatures::X86::kFXSROPT);
|
411
|
+
features.addIf(bitTest(regs.edx, 22), CpuFeatures::X86::kMMX2);
|
412
|
+
features.addIf(bitTest(regs.edx, 27), CpuFeatures::X86::kRDTSCP);
|
413
|
+
features.addIf(bitTest(regs.edx, 29), CpuFeatures::X86::kPREFETCHW);
|
414
|
+
features.addIf(bitTest(regs.edx, 30), CpuFeatures::X86::k3DNOW2, CpuFeatures::X86::kMMX2);
|
415
|
+
features.addIf(bitTest(regs.edx, 31), CpuFeatures::X86::kPREFETCHW);
|
416
|
+
|
417
|
+
if (features.hasAVX()) {
|
418
|
+
features.addIf(bitTest(regs.ecx, 11), CpuFeatures::X86::kXOP);
|
419
|
+
features.addIf(bitTest(regs.ecx, 16), CpuFeatures::X86::kFMA4);
|
420
|
+
}
|
421
|
+
|
422
|
+
// This feature seems to be only supported by AMD.
|
423
|
+
if (cpu.isVendor("AMD")) {
|
424
|
+
features.addIf(bitTest(regs.ecx, 4), CpuFeatures::X86::kALTMOVCR8);
|
425
|
+
}
|
426
|
+
break;
|
427
|
+
|
428
|
+
case 0x80000002u:
|
429
|
+
case 0x80000003u:
|
430
|
+
case 0x80000004u:
|
431
|
+
*brand++ = regs.eax;
|
432
|
+
*brand++ = regs.ebx;
|
433
|
+
*brand++ = regs.ecx;
|
434
|
+
*brand++ = regs.edx;
|
435
|
+
|
436
|
+
// Go directly to the next one we are interested in.
|
437
|
+
if (i == 0x80000004u)
|
438
|
+
i = 0x80000008u - 1;
|
439
|
+
break;
|
440
|
+
|
441
|
+
case 0x80000008u:
|
442
|
+
features.addIf(bitTest(regs.ebx, 0), CpuFeatures::X86::kCLZERO);
|
443
|
+
features.addIf(bitTest(regs.ebx, 0), CpuFeatures::X86::kRDPRU);
|
444
|
+
features.addIf(bitTest(regs.ebx, 8), CpuFeatures::X86::kMCOMMIT);
|
445
|
+
features.addIf(bitTest(regs.ebx, 9), CpuFeatures::X86::kWBNOINVD);
|
446
|
+
|
447
|
+
// Go directly to the next one we are interested in.
|
448
|
+
i = 0x8000001Fu - 1;
|
449
|
+
break;
|
450
|
+
|
451
|
+
case 0x8000001Fu:
|
452
|
+
features.addIf(bitTest(regs.eax, 4), CpuFeatures::X86::kSNP);
|
453
|
+
break;
|
454
|
+
}
|
455
|
+
} while (++i <= maxId);
|
456
|
+
|
457
|
+
// Simplify CPU brand string a bit by removing some unnecessary spaces.
|
458
|
+
simplifyCpuBrand(cpu._brand.str);
|
459
|
+
}
|
460
|
+
|
461
|
+
#endif // ASMJIT_ARCH_X86
|
462
|
+
|
463
|
+
// CpuInfo - Detect - ARM
|
464
|
+
// ======================
|
465
|
+
|
466
|
+
// The most relevant and accurate information can be found here:
|
467
|
+
// https://github.com/llvm-project/llvm/blob/master/lib/Target/AArch64/AArch64.td
|
468
|
+
// https://github.com/apple/llvm-project/blob/apple/main/llvm/lib/Target/AArch64/AArch64.td (Apple fork)
|
469
|
+
//
|
470
|
+
// Other resources:
|
471
|
+
// https://en.wikipedia.org/wiki/AArch64
|
472
|
+
// https://en.wikipedia.org/wiki/Apple_silicon#List_of_Apple_processors
|
473
|
+
// https://developer.arm.com/architectures/learn-the-architecture/understanding-the-armv8-x-extensions/single-page
|
474
|
+
|
475
|
+
#if ASMJIT_ARCH_ARM
|
476
|
+
|
477
|
+
static inline void populateBaseARMFeatures(CpuInfo& cpu) noexcept {
|
478
|
+
#if ASMJIT_ARCH_ARM == 32
|
479
|
+
// No baseline flags at the moment.
|
480
|
+
DebugUtils::unused(cpu);
|
481
|
+
#else
|
482
|
+
// AArch64 is based on ARMv8-A and later.
|
483
|
+
cpu.addFeature(CpuFeatures::ARM::kARMv6);
|
484
|
+
cpu.addFeature(CpuFeatures::ARM::kARMv7);
|
485
|
+
cpu.addFeature(CpuFeatures::ARM::kARMv8a);
|
486
|
+
|
487
|
+
// AArch64 comes with these features by default.
|
488
|
+
cpu.addFeature(CpuFeatures::ARM::kVFPv2);
|
489
|
+
cpu.addFeature(CpuFeatures::ARM::kVFPv3);
|
490
|
+
cpu.addFeature(CpuFeatures::ARM::kVFPv4);
|
491
|
+
cpu.addFeature(CpuFeatures::ARM::kASIMD);
|
492
|
+
cpu.addFeature(CpuFeatures::ARM::kIDIVA);
|
493
|
+
#endif
|
494
|
+
}
|
495
|
+
|
496
|
+
// Detects ARM version by macros defined at compile time. This means that AsmJit will report features forced at
|
497
|
+
// compile time that should always be provided by the target CPU. This also means that if we don't provide any
|
498
|
+
// means to detect CPU features the features reported by AsmJit will at least not report less features than the
|
499
|
+
// target it was compiled to.
|
500
|
+
ASMJIT_MAYBE_UNUSED
|
501
|
+
static ASMJIT_FAVOR_SIZE void detectARMFeaturesViaCompilerFlags(CpuInfo& cpu) noexcept {
|
502
|
+
DebugUtils::unused(cpu);
|
503
|
+
|
504
|
+
#if ASMJIT_ARCH_ARM == 32
|
505
|
+
|
506
|
+
// ARM targets have no baseline at the moment.
|
507
|
+
# if defined(__ARM_ARCH_7A__)
|
508
|
+
cpu.addFeature(CpuFeatures::ARM::kARMv7);
|
509
|
+
# endif
|
510
|
+
# if defined(__ARM_ARCH_8A__)
|
511
|
+
cpu.addFeature(CpuFeatures::ARM::kARMv8a);
|
512
|
+
# endif
|
513
|
+
|
514
|
+
# if defined(__TARGET_ARCH_THUMB)
|
515
|
+
cpu.addFeature(CpuFeatures::ARM::kTHUMB);
|
516
|
+
# if __TARGET_ARCH_THUMB >= 4
|
517
|
+
cpu.addFeature(CpuFeatures::ARM::kTHUMBv2);
|
518
|
+
# endif
|
519
|
+
# endif
|
520
|
+
|
521
|
+
# if defined(__ARM_FEATURE_FMA)
|
522
|
+
cpu.addFeature(CpuFeatures::ARM::kVFPv3);
|
523
|
+
cpu.addFeature(CpuFeatures::ARM::kVFPv4);
|
524
|
+
# endif
|
525
|
+
|
526
|
+
# if defined(__ARM_NEON)
|
527
|
+
cpu.addFeature(CpuFeatures::ARM::kASIMD);
|
528
|
+
# endif
|
529
|
+
|
530
|
+
# if defined(__ARM_FEATURE_IDIV) && defined(__TARGET_ARCH_THUMB)
|
531
|
+
cpu.addFeature(CpuFeatures::ARM::kIDIVT);
|
532
|
+
#endif
|
533
|
+
# if defined(__ARM_FEATURE_IDIV) && !defined(__TARGET_ARCH_THUMB)
|
534
|
+
cpu.addFeature(CpuFeatures::ARM::kIDIVA);
|
535
|
+
# endif
|
536
|
+
|
537
|
+
#endif
|
538
|
+
|
539
|
+
#if defined(__ARM_ARCH_8_1A__)
|
540
|
+
cpu.addFeature(CpuFeatures::ARM::kARMv8_1a);
|
541
|
+
#endif
|
542
|
+
#if defined(__ARM_ARCH_8_2A__)
|
543
|
+
cpu.addFeature(CpuFeatures::ARM::kARMv8_2a);
|
544
|
+
#endif
|
545
|
+
#if defined(__ARM_ARCH_8_3A__)
|
546
|
+
cpu.addFeature(CpuFeatures::ARM::kARMv8_3a);
|
547
|
+
#endif
|
548
|
+
#if defined(__ARM_ARCH_8_4A__)
|
549
|
+
cpu.addFeature(CpuFeatures::ARM::kARMv8_4a);
|
550
|
+
#endif
|
551
|
+
#if defined(__ARM_ARCH_8_5A__)
|
552
|
+
cpu.addFeature(CpuFeatures::ARM::kARMv8_5a);
|
553
|
+
#endif
|
554
|
+
#if defined(__ARM_ARCH_8_6A__)
|
555
|
+
cpu.addFeature(CpuFeatures::ARM::kARMv8_6a);
|
556
|
+
#endif
|
557
|
+
#if defined(__ARM_ARCH_8_7A__)
|
558
|
+
cpu.addFeature(CpuFeatures::ARM::kARMv8_7a);
|
559
|
+
#endif
|
560
|
+
|
561
|
+
#if defined(__ARM_FEATURE_AES)
|
562
|
+
cpu.addFeature(CpuFeatures::ARM::kAES);
|
563
|
+
#endif
|
564
|
+
|
565
|
+
#if defined(__ARM_FEATURE_BF16_SCALAR_ARITHMETIC) && defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC)
|
566
|
+
cpu.addFeature(CpuFeatures::ARM::kBF16);
|
567
|
+
#endif
|
568
|
+
|
569
|
+
#if defined(__ARM_FEATURE_CRC32)
|
570
|
+
cpu.addFeature(CpuFeatures::ARM::kCRC32);
|
571
|
+
#endif
|
572
|
+
|
573
|
+
#if defined(__ARM_FEATURE_CRYPTO)
|
574
|
+
cpu.addFeature(CpuFeatures::ARM::kAES,
|
575
|
+
CpuFeatures::ARM::kSHA1,
|
576
|
+
CpuFeatures::ARM::kSHA2);
|
577
|
+
#endif
|
578
|
+
|
579
|
+
#if defined(__ARM_FEATURE_DOTPROD)
|
580
|
+
cpu.addFeature(CpuFeatures::ARM::kDOTPROD);
|
581
|
+
#endif
|
582
|
+
|
583
|
+
#if defined(__ARM_FEATURE_FP16FML) || defined(__ARM_FEATURE_FP16_FML)
|
584
|
+
cpu.addFeature(CpuFeatures::ARM::kFP16FML);
|
585
|
+
#endif
|
586
|
+
|
587
|
+
#if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC)
|
588
|
+
cpu.addFeature(CpuFeatures::ARM::kFP16FULL);
|
589
|
+
#endif
|
590
|
+
|
591
|
+
#if defined(__ARM_FEATURE_FRINT)
|
592
|
+
cpu.addFeature(CpuFeatures::ARM::kFRINT);
|
593
|
+
#endif
|
594
|
+
|
595
|
+
#if defined(__ARM_FEATURE_JCVT)
|
596
|
+
cpu.addFeature(CpuFeatures::ARM::kFJCVTZS);
|
597
|
+
#endif
|
598
|
+
|
599
|
+
#if defined(__ARM_FEATURE_MATMUL_INT8)
|
600
|
+
cpu.addFeature(CpuFeatures::ARM::kI8MM);
|
601
|
+
#endif
|
602
|
+
|
603
|
+
#if defined(__ARM_FEATURE_ATOMICS)
|
604
|
+
cpu.addFeature(CpuFeatures::ARM::kLSE);
|
605
|
+
#endif
|
606
|
+
|
607
|
+
#if defined(__ARM_FEATURE_MEMORY_TAGGING)
|
608
|
+
cpu.addFeature(CpuFeatures::ARM::kMTE);
|
609
|
+
#endif
|
610
|
+
|
611
|
+
#if defined(__ARM_FEATURE_QRDMX)
|
612
|
+
cpu.addFeature(CpuFeatures::ARM::kRDM);
|
613
|
+
#endif
|
614
|
+
|
615
|
+
#if defined(__ARM_FEATURE_RNG)
|
616
|
+
cpu.addFeature(CpuFeatures::ARM::kRNG);
|
617
|
+
#endif
|
618
|
+
|
619
|
+
#if defined(__ARM_FEATURE_SHA2)
|
620
|
+
cpu.addFeature(CpuFeatures::ARM::kSHA2);
|
621
|
+
#endif
|
622
|
+
|
623
|
+
#if defined(__ARM_FEATURE_SHA3)
|
624
|
+
cpu.addFeature(CpuFeatures::ARM::kSHA3);
|
625
|
+
#endif
|
626
|
+
|
627
|
+
#if defined(__ARM_FEATURE_SHA512)
|
628
|
+
cpu.addFeature(CpuFeatures::ARM::kSHA512);
|
629
|
+
#endif
|
630
|
+
|
631
|
+
#if defined(__ARM_FEATURE_SM3)
|
632
|
+
cpu.addFeature(CpuFeatures::ARM::kSM3);
|
633
|
+
#endif
|
634
|
+
|
635
|
+
#if defined(__ARM_FEATURE_SM4)
|
636
|
+
cpu.addFeature(CpuFeatures::ARM::kSM4);
|
637
|
+
#endif
|
638
|
+
|
639
|
+
#if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_SVE_VECTOR_OPERATORS)
|
640
|
+
cpu.addFeature(CpuFeatures::ARM::kSVE);
|
641
|
+
#endif
|
642
|
+
|
643
|
+
#if defined(__ARM_FEATURE_SVE_MATMUL_INT8)
|
644
|
+
cpu.addFeature(CpuFeatures::ARM::kSVE_I8MM);
|
645
|
+
#endif
|
646
|
+
|
647
|
+
#if defined(__ARM_FEATURE_SVE_MATMUL_FP32)
|
648
|
+
cpu.addFeature(CpuFeatures::ARM::kSVE_F32MM);
|
649
|
+
#endif
|
650
|
+
|
651
|
+
#if defined(__ARM_FEATURE_SVE_MATMUL_FP64)
|
652
|
+
cpu.addFeature(CpuFeatures::ARM::kSVE_F64MM);
|
653
|
+
#endif
|
654
|
+
|
655
|
+
#if defined(__ARM_FEATURE_SVE2)
|
656
|
+
cpu.addFeature(CpuFeatures::ARM::kSVE2);
|
657
|
+
#endif
|
658
|
+
|
659
|
+
#if defined(__ARM_FEATURE_SVE2_AES)
|
660
|
+
cpu.addFeature(CpuFeatures::ARM::kSVE2_AES);
|
661
|
+
#endif
|
662
|
+
|
663
|
+
#if defined(__ARM_FEATURE_SVE2_BITPERM)
|
664
|
+
cpu.addFeature(CpuFeatures::ARM::kSVE2_BITPERM);
|
665
|
+
#endif
|
666
|
+
|
667
|
+
#if defined(__ARM_FEATURE_SVE2_SHA3)
|
668
|
+
cpu.addFeature(CpuFeatures::ARM::kSVE2_SHA3);
|
669
|
+
#endif
|
670
|
+
|
671
|
+
#if defined(__ARM_FEATURE_SVE2_SM4)
|
672
|
+
cpu.addFeature(CpuFeatures::ARM::kSVE2_SM4);
|
673
|
+
#endif
|
674
|
+
|
675
|
+
#if defined(__ARM_FEATURE_TME)
|
676
|
+
cpu.addFeature(CpuFeatures::ARM::kTME);
|
677
|
+
#endif
|
678
|
+
}
|
679
|
+
|
680
|
+
ASMJIT_MAYBE_UNUSED
|
681
|
+
static ASMJIT_FAVOR_SIZE void expandARMFeaturesByVersion(CpuInfo& cpu) noexcept {
|
682
|
+
CpuFeatures::ARM& features = cpu.features().arm();
|
683
|
+
|
684
|
+
if (features.hasARMv8_7a()) {
|
685
|
+
features.add(CpuFeatures::ARM::kARMv8_6a);
|
686
|
+
}
|
687
|
+
|
688
|
+
if (features.hasARMv8_6a()) {
|
689
|
+
features.add(CpuFeatures::ARM::kARMv8_5a,
|
690
|
+
CpuFeatures::ARM::kBF16);
|
691
|
+
|
692
|
+
if (features.hasSVE())
|
693
|
+
features.add(CpuFeatures::ARM::kSVE_I8MM);
|
694
|
+
}
|
695
|
+
|
696
|
+
if (features.hasARMv8_5a()) {
|
697
|
+
features.add(CpuFeatures::ARM::kARMv8_4a,
|
698
|
+
CpuFeatures::ARM::kALTNZCV,
|
699
|
+
CpuFeatures::ARM::kBTI,
|
700
|
+
CpuFeatures::ARM::kFRINT,
|
701
|
+
CpuFeatures::ARM::kSB,
|
702
|
+
CpuFeatures::ARM::kSSBS);
|
703
|
+
}
|
704
|
+
|
705
|
+
if (features.hasARMv8_4a()) {
|
706
|
+
features.add(CpuFeatures::ARM::kARMv8_3a,
|
707
|
+
CpuFeatures::ARM::kDIT,
|
708
|
+
CpuFeatures::ARM::kDOTPROD,
|
709
|
+
CpuFeatures::ARM::kFLAGM,
|
710
|
+
CpuFeatures::ARM::kPMU,
|
711
|
+
CpuFeatures::ARM::kRCPC_IMMO);
|
712
|
+
}
|
713
|
+
|
714
|
+
if (features.hasARMv8_3a()) {
|
715
|
+
features.add(CpuFeatures::ARM::kARMv8_2a,
|
716
|
+
CpuFeatures::ARM::kFCMA,
|
717
|
+
CpuFeatures::ARM::kFJCVTZS);
|
718
|
+
}
|
719
|
+
|
720
|
+
if (features.hasARMv8_2a()) {
|
721
|
+
features.add(CpuFeatures::ARM::kARMv8_1a);
|
722
|
+
}
|
723
|
+
|
724
|
+
if (features.hasARMv8_1a()) {
|
725
|
+
features.add(CpuFeatures::ARM::kARMv8a,
|
726
|
+
CpuFeatures::ARM::kCRC32,
|
727
|
+
CpuFeatures::ARM::kLSE,
|
728
|
+
CpuFeatures::ARM::kRDM);
|
729
|
+
}
|
730
|
+
|
731
|
+
if (features.hasARMv8a()) {
|
732
|
+
features.add(CpuFeatures::ARM::kARMv7,
|
733
|
+
CpuFeatures::ARM::kVFPv2,
|
734
|
+
CpuFeatures::ARM::kVFPv3,
|
735
|
+
CpuFeatures::ARM::kVFPv4,
|
736
|
+
CpuFeatures::ARM::kVFP_D32,
|
737
|
+
CpuFeatures::ARM::kASIMD,
|
738
|
+
CpuFeatures::ARM::kIDIVA);
|
739
|
+
}
|
740
|
+
}
|
741
|
+
|
742
|
+
// CpuInfo - Detect - ARM [Windows]
|
743
|
+
// ================================
|
744
|
+
|
745
|
+
#if defined(_WIN32)
|
746
|
+
struct WinPFPMapping {
|
747
|
+
uint8_t featureId;
|
748
|
+
uint8_t pfpFeatureId;
|
749
|
+
};
|
750
|
+
|
751
|
+
static ASMJIT_FAVOR_SIZE void detectPFPFeatures(CpuInfo& cpu, const WinPFPMapping* mapping, size_t size) noexcept {
|
752
|
+
for (size_t i = 0; i < size; i++)
|
753
|
+
if (::IsProcessorFeaturePresent(mapping[i].pfpFeatureId))
|
754
|
+
cpu.addFeature(mapping[i].featureId);
|
755
|
+
}
|
756
|
+
|
757
|
+
//! Detect ARM CPU features on Windows.
|
758
|
+
//!
|
759
|
+
//! The detection is based on `IsProcessorFeaturePresent()` API call.
|
760
|
+
static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
|
761
|
+
cpu._wasDetected = true;
|
762
|
+
populateBaseARMFeatures(cpu);
|
763
|
+
|
764
|
+
CpuFeatures::ARM& features = cpu.features().arm();
|
765
|
+
|
766
|
+
// Win32 for ARM requires ARMv7 with DSP extensions, VFPv3, and uses THUMBv2 by default.
|
767
|
+
#if ASMJIT_ARCH_ARM == 32
|
768
|
+
features.add(CpuFeatures::ARM::kTHUMB);
|
769
|
+
features.add(CpuFeatures::ARM::kTHUMBv2);
|
770
|
+
features.add(CpuFeatures::ARM::kARMv6);
|
771
|
+
features.add(CpuFeatures::ARM::kARMv7);
|
772
|
+
features.add(CpuFeatures::ARM::kEDSP);
|
773
|
+
features.add(CpuFeatures::ARM::kVFPv2);
|
774
|
+
features.add(CpuFeatures::ARM::kVFPv3);
|
775
|
+
#endif
|
776
|
+
|
777
|
+
// Windows for ARM requires ASIMD.
|
778
|
+
features.add(CpuFeatures::ARM::kASIMD);
|
779
|
+
|
780
|
+
// Detect additional CPU features by calling `IsProcessorFeaturePresent()`.
|
781
|
+
static const WinPFPMapping mapping[] = {
|
782
|
+
#if ASMJIT_ARCH_ARM == 32
|
783
|
+
{ uint8_t(CpuFeatures::ARM::kVFP_D32) , 18 }, // PF_ARM_VFP_32_REGISTERS_AVAILABLE
|
784
|
+
{ uint8_t(CpuFeatures::ARM::kIDIVT) , 24 }, // PF_ARM_DIVIDE_INSTRUCTION_AVAILABLE
|
785
|
+
{ uint8_t(CpuFeatures::ARM::kVFPv4) , 27 }, // PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE
|
786
|
+
{ uint8_t(CpuFeatures::ARM::kARMv8a) , 29 }, // PF_ARM_V8_INSTRUCTIONS_AVAILABLE
|
787
|
+
#endif
|
788
|
+
{ uint8_t(CpuFeatures::ARM::kAES) , 30 }, // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE
|
789
|
+
{ uint8_t(CpuFeatures::ARM::kCRC32) , 31 }, // PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE
|
790
|
+
{ uint8_t(CpuFeatures::ARM::kLSE) , 34 } // PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE
|
791
|
+
|
792
|
+
};
|
793
|
+
detectPFPFeatures(cpu, mapping, ASMJIT_ARRAY_SIZE(mapping));
|
794
|
+
|
795
|
+
// Windows provides several instructions under a single flag:
|
796
|
+
if (features.hasAES()) {
|
797
|
+
features.add(CpuFeatures::ARM::kSHA1,
|
798
|
+
CpuFeatures::ARM::kSHA2);
|
799
|
+
}
|
800
|
+
|
801
|
+
expandARMFeaturesByVersion(cpu);
|
802
|
+
}
|
803
|
+
|
804
|
+
// CpuInfo - Detect - ARM [Linux]
|
805
|
+
// ==============================
|
806
|
+
|
807
|
+
#elif defined(__linux__)
|
808
|
+
|
809
|
+
struct LinuxHWCapMapping {
|
810
|
+
uint8_t featureId;
|
811
|
+
uint8_t hwCapBit;
|
812
|
+
};
|
813
|
+
|
814
|
+
static ASMJIT_FAVOR_SIZE void detectHWCaps(CpuInfo& cpu, unsigned long type, const LinuxHWCapMapping* mapping, size_t size) noexcept {
|
815
|
+
unsigned long mask = getauxval(type);
|
816
|
+
for (size_t i = 0; i < size; i++)
|
817
|
+
cpu.features().addIf(Support::bitTest(mask, mapping[i].hwCapBit), mapping[i].featureId);
|
818
|
+
}
|
819
|
+
|
820
|
+
#if ASMJIT_ARCH_ARM == 32
|
821
|
+
|
822
|
+
// `AT_HWCAP` provides ARMv7 (and less) related flags.
|
823
|
+
static const LinuxHWCapMapping hwCapMapping[] = {
|
824
|
+
{ uint8_t(CpuFeatures::ARM::kVFPv2) , 6 }, // HWCAP_VFP
|
825
|
+
{ uint8_t(CpuFeatures::ARM::kEDSP) , 7 }, // HWCAP_EDSP
|
826
|
+
{ uint8_t(CpuFeatures::ARM::kASIMD) , 12 }, // HWCAP_NEON
|
827
|
+
{ uint8_t(CpuFeatures::ARM::kVFPv3) , 13 }, // HWCAP_VFPv3
|
828
|
+
{ uint8_t(CpuFeatures::ARM::kVFPv4) , 16 }, // HWCAP_VFPv4
|
829
|
+
{ uint8_t(CpuFeatures::ARM::kIDIVA) , 17 }, // HWCAP_IDIVA
|
830
|
+
{ uint8_t(CpuFeatures::ARM::kIDIVT) , 18 }, // HWCAP_IDIVT
|
831
|
+
{ uint8_t(CpuFeatures::ARM::kVFP_D32) , 19 } // HWCAP_VFPD32
|
832
|
+
};
|
833
|
+
|
834
|
+
// `AT_HWCAP2` provides ARMv8+ related flags.
|
835
|
+
static const LinuxHWCapMapping hwCap2Mapping[] = {
|
836
|
+
{ uint8_t(CpuFeatures::ARM::kAES) , 0 }, // HWCAP2_AES
|
837
|
+
{ uint8_t(CpuFeatures::ARM::kPMULL) , 1 }, // HWCAP2_PMULL
|
838
|
+
{ uint8_t(CpuFeatures::ARM::kSHA1) , 2 }, // HWCAP2_SHA1
|
839
|
+
{ uint8_t(CpuFeatures::ARM::kSHA2) , 3 }, // HWCAP2_SHA2
|
840
|
+
{ uint8_t(CpuFeatures::ARM::kCRC32) , 4 } // HWCAP2_CRC32
|
841
|
+
};
|
842
|
+
|
843
|
+
static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
|
844
|
+
cpu._wasDetected = true;
|
845
|
+
|
846
|
+
populateBaseARMFeatures(cpu);
|
847
|
+
|
848
|
+
CpuFeatures::ARM& features = cpu.features().arm();
|
849
|
+
|
850
|
+
detectHWCaps(cpu, AT_HWCAP, hwCapMapping, ASMJIT_ARRAY_SIZE(hwCapMapping));
|
851
|
+
detectHWCaps(cpu, AT_HWCAP2, hwCap2Mapping, ASMJIT_ARRAY_SIZE(hwCap2Mapping));
|
852
|
+
|
853
|
+
// VFPv3 implies VFPv2.
|
854
|
+
if (features.hasVFPv3())
|
855
|
+
features.add(CpuFeatures::ARM::kVFPv2);
|
856
|
+
|
857
|
+
// VFPv2 implies ARMv6.
|
858
|
+
if (features.hasVFPv2())
|
859
|
+
features.add(CpuFeatures::ARM::kARMv6);
|
860
|
+
|
861
|
+
// ARMv7 provides VFPv3|ASIMD.
|
862
|
+
if (features.hasVFPv3() || features.hasASIMD())
|
863
|
+
features.add(CpuFeatures::ARM::kARMv7);
|
864
|
+
|
865
|
+
// ARMv8 provives AES, CRC32, PMULL, SHA1, and SHA2.
|
866
|
+
if (features.hasAES() || features.hasCRC32() || features.hasPMULL() || features.hasSHA1() || features.hasSHA2())
|
867
|
+
features.add(CpuFeatures::ARM::kARMv8a);
|
868
|
+
}
|
869
|
+
|
870
|
+
#else
|
871
|
+
|
872
|
+
// `AT_HWCAP` provides ARMv8+ related flags.
|
873
|
+
static const LinuxHWCapMapping hwCapMapping[] = {
|
874
|
+
/*
|
875
|
+
{ uint8_t(CpuFeatures::ARM::k) , 0 }, // HWCAP_FP
|
876
|
+
*/
|
877
|
+
{ uint8_t(CpuFeatures::ARM::kASIMD) , 1 }, // HWCAP_ASIMD
|
878
|
+
/*
|
879
|
+
{ uint8_t(CpuFeatures::ARM::k) , 2 }, // HWCAP_EVTSTRM
|
880
|
+
*/
|
881
|
+
{ uint8_t(CpuFeatures::ARM::kAES) , 3 }, // HWCAP_AES
|
882
|
+
{ uint8_t(CpuFeatures::ARM::kPMULL) , 4 }, // HWCAP_PMULL
|
883
|
+
{ uint8_t(CpuFeatures::ARM::kSHA1) , 5 }, // HWCAP_SHA1
|
884
|
+
{ uint8_t(CpuFeatures::ARM::kSHA2) , 6 }, // HWCAP_SHA2
|
885
|
+
{ uint8_t(CpuFeatures::ARM::kCRC32) , 7 }, // HWCAP_CRC32
|
886
|
+
{ uint8_t(CpuFeatures::ARM::kLSE) , 8 }, // HWCAP_ATOMICS
|
887
|
+
{ uint8_t(CpuFeatures::ARM::kFP16CONV) , 9 }, // HWCAP_FPHP
|
888
|
+
{ uint8_t(CpuFeatures::ARM::kFP16FULL) , 10 }, // HWCAP_ASIMDHP
|
889
|
+
{ uint8_t(CpuFeatures::ARM::kCPUID) , 11 }, // HWCAP_CPUID
|
890
|
+
{ uint8_t(CpuFeatures::ARM::kRDM) , 12 }, // HWCAP_ASIMDRDM
|
891
|
+
{ uint8_t(CpuFeatures::ARM::kFJCVTZS) , 13 }, // HWCAP_JSCVT
|
892
|
+
{ uint8_t(CpuFeatures::ARM::kFCMA) , 14 }, // HWCAP_FCMA
|
893
|
+
/*
|
894
|
+
{ uint8_t(CpuFeatures::ARM::k) , 15 }, // HWCAP_LRCPC
|
895
|
+
{ uint8_t(CpuFeatures::ARM::k) , 16 }, // HWCAP_DCPOP
|
896
|
+
*/
|
897
|
+
{ uint8_t(CpuFeatures::ARM::kSHA3) , 17 }, // HWCAP_SHA3
|
898
|
+
{ uint8_t(CpuFeatures::ARM::kSM3) , 18 }, // HWCAP_SM3
|
899
|
+
{ uint8_t(CpuFeatures::ARM::kSM4) , 19 }, // HWCAP_SM4
|
900
|
+
{ uint8_t(CpuFeatures::ARM::kDOTPROD) , 20 }, // HWCAP_ASIMDDP
|
901
|
+
{ uint8_t(CpuFeatures::ARM::kSHA512) , 21 }, // HWCAP_SHA512
|
902
|
+
{ uint8_t(CpuFeatures::ARM::kSVE) , 22 }, // HWCAP_SVE
|
903
|
+
{ uint8_t(CpuFeatures::ARM::kFP16FML) , 23 }, // HWCAP_ASIMDFHM
|
904
|
+
{ uint8_t(CpuFeatures::ARM::kDIT) , 24 }, // HWCAP_DIT
|
905
|
+
/*
|
906
|
+
{ uint8_t(CpuFeatures::ARM::k) , 25 }, // HWCAP_USCAT
|
907
|
+
{ uint8_t(CpuFeatures::ARM::k) , 26 }, // HWCAP_ILRCPC
|
908
|
+
*/
|
909
|
+
{ uint8_t(CpuFeatures::ARM::kFLAGM) , 27 }, // HWCAP_FLAGM
|
910
|
+
{ uint8_t(CpuFeatures::ARM::kSSBS) , 28 }, // HWCAP_SSBS
|
911
|
+
{ uint8_t(CpuFeatures::ARM::kSB) , 29 } // HWCAP_SB
|
912
|
+
/*
|
913
|
+
{ uint8_t(CpuFeatures::ARM::k) , 30 }, // HWCAP_PACA
|
914
|
+
{ uint8_t(CpuFeatures::ARM::k) , 31 } // HWCAP_PACG
|
915
|
+
*/
|
916
|
+
};
|
917
|
+
|
918
|
+
// `AT_HWCAP2` provides ARMv8+ related flags.
|
919
|
+
static const LinuxHWCapMapping hwCapMapping2[] = {
|
920
|
+
/*
|
921
|
+
{ uint8_t(CpuFeatures::ARM::k) , 0 }, // HWCAP2_DCPODP
|
922
|
+
*/
|
923
|
+
{ uint8_t(CpuFeatures::ARM::kSVE2) , 1 }, // HWCAP2_SVE2
|
924
|
+
{ uint8_t(CpuFeatures::ARM::kSVE2_AES) , 2 }, // HWCAP2_SVEAES
|
925
|
+
{ uint8_t(CpuFeatures::ARM::kSVE_PMULL) , 3 }, // HWCAP2_SVEPMULL
|
926
|
+
{ uint8_t(CpuFeatures::ARM::kSVE2_BITPERM), 4 }, // HWCAP2_SVEBITPERM
|
927
|
+
{ uint8_t(CpuFeatures::ARM::kSVE2_SHA3) , 5 }, // HWCAP2_SVESHA3
|
928
|
+
{ uint8_t(CpuFeatures::ARM::kSVE2_SM4) , 6 }, // HWCAP2_SVESM4
|
929
|
+
{ uint8_t(CpuFeatures::ARM::kALTNZCV) , 7 }, // HWCAP2_FLAGM2
|
930
|
+
{ uint8_t(CpuFeatures::ARM::kFRINT) , 8 }, // HWCAP2_FRINT
|
931
|
+
{ uint8_t(CpuFeatures::ARM::kSVE_I8MM) , 9 }, // HWCAP2_SVEI8MM
|
932
|
+
{ uint8_t(CpuFeatures::ARM::kSVE_F32MM) , 10 }, // HWCAP2_SVEF32MM
|
933
|
+
{ uint8_t(CpuFeatures::ARM::kSVE_F64MM) , 11 }, // HWCAP2_SVEF64MM
|
934
|
+
{ uint8_t(CpuFeatures::ARM::kSVE_BF16) , 12 }, // HWCAP2_SVEBF16
|
935
|
+
{ uint8_t(CpuFeatures::ARM::kI8MM) , 13 }, // HWCAP2_I8MM
|
936
|
+
{ uint8_t(CpuFeatures::ARM::kBF16) , 14 }, // HWCAP2_BF16
|
937
|
+
{ uint8_t(CpuFeatures::ARM::kDGH) , 15 }, // HWCAP2_DGH
|
938
|
+
{ uint8_t(CpuFeatures::ARM::kRNG) , 16 }, // HWCAP2_RNG
|
939
|
+
{ uint8_t(CpuFeatures::ARM::kBTI) , 17 }, // HWCAP2_BTI
|
940
|
+
{ uint8_t(CpuFeatures::ARM::kMTE) , 18 } // HWCAP2_MTE
|
941
|
+
};
|
942
|
+
|
943
|
+
static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
|
944
|
+
cpu._wasDetected = true;
|
945
|
+
populateBaseARMFeatures(cpu);
|
946
|
+
|
947
|
+
detectHWCaps(cpu, AT_HWCAP, hwCapMapping, ASMJIT_ARRAY_SIZE(hwCapMapping));
|
948
|
+
detectHWCaps(cpu, AT_HWCAP2, hwCapMapping2, ASMJIT_ARRAY_SIZE(hwCapMapping2));
|
949
|
+
}
|
950
|
+
|
951
|
+
#endif
|
952
|
+
|
953
|
+
// CpuInfo - Detect - ARM [Apple]
|
954
|
+
// ==============================
|
955
|
+
|
956
|
+
#elif defined(__APPLE__)
|
957
|
+
|
958
|
+
namespace AppleHWId {
|
959
|
+
enum CpuFamily : uint32_t {
|
960
|
+
// Generic ARM.
|
961
|
+
kCpuFamily_ARM_9 = 0xE73283AEu,
|
962
|
+
kCpuFamily_ARM_11 = 0x8FF620D8u,
|
963
|
+
kCpuFamily_ARM_12 = 0xBD1B0AE9u,
|
964
|
+
kCpuFamily_ARM_13 = 0x0CC90E64u,
|
965
|
+
kCpuFamily_ARM_14 = 0x96077EF1u,
|
966
|
+
kCpuFamily_ARM_15 = 0xA8511BCAu,
|
967
|
+
|
968
|
+
// Apple design.
|
969
|
+
kCpuFamily_SWIFT = 0x1E2D6381u,
|
970
|
+
kCpuFamily_CYCLONE = 0x37A09642u,
|
971
|
+
kCpuFamily_TYPHOON = 0x2C91A47Eu,
|
972
|
+
kCpuFamily_TWISTER = 0x92FB37C8u,
|
973
|
+
kCpuFamily_HURRICANE = 0x67CEEE93u,
|
974
|
+
kCpuFamily_MONSOON_MISTRAL = 0xE81E7EF6u,
|
975
|
+
kCpuFamily_VORTEX_TEMPEST = 0x07D34B9Fu,
|
976
|
+
kCpuFamily_LIGHTNING_THUNDER = 0x462504D2u,
|
977
|
+
kCpuFamily_FIRESTORM_ICESTORM = 0x1B588BB3u
|
978
|
+
};
|
979
|
+
};
|
980
|
+
|
981
|
+
static ASMJIT_FAVOR_SIZE uint32_t queryARMCpuFamilyId() noexcept {
|
982
|
+
uint32_t result = 0;
|
983
|
+
size_t size = sizeof(result);
|
984
|
+
|
985
|
+
int res = sysctlbyname("hw.cpufamily", &result, &size, nullptr, 0);
|
986
|
+
if (res != 0)
|
987
|
+
return 0;
|
988
|
+
else
|
989
|
+
return result;
|
990
|
+
}
|
991
|
+
|
992
|
+
static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
|
993
|
+
cpu._wasDetected = true;
|
994
|
+
populateBaseARMFeatures(cpu);
|
995
|
+
|
996
|
+
uint32_t cpuFamilyId = queryARMCpuFamilyId();
|
997
|
+
CpuFeatures::ARM& features = cpu.features().arm();
|
998
|
+
|
999
|
+
switch (cpuFamilyId) {
|
1000
|
+
case AppleHWId::kCpuFamily_ARM_9:
|
1001
|
+
case AppleHWId::kCpuFamily_ARM_11:
|
1002
|
+
case AppleHWId::kCpuFamily_ARM_12:
|
1003
|
+
break;
|
1004
|
+
|
1005
|
+
// ARM Cortex A8.
|
1006
|
+
case AppleHWId::kCpuFamily_ARM_13:
|
1007
|
+
break;
|
1008
|
+
|
1009
|
+
// ARM Cortex A9.
|
1010
|
+
case AppleHWId::kCpuFamily_ARM_14:
|
1011
|
+
break;
|
1012
|
+
|
1013
|
+
// ARM Cortex A7 - ARMv7k.
|
1014
|
+
case AppleHWId::kCpuFamily_ARM_15:
|
1015
|
+
features.add(CpuFeatures::ARM::kARMv7);
|
1016
|
+
break;
|
1017
|
+
|
1018
|
+
// Apple A6/A6X - ARMv7s.
|
1019
|
+
case AppleHWId::kCpuFamily_SWIFT:
|
1020
|
+
features.add(CpuFeatures::ARM::kARMv7);
|
1021
|
+
break;
|
1022
|
+
|
1023
|
+
// Apple A7 - ARMv8.0-A.
|
1024
|
+
case AppleHWId::kCpuFamily_CYCLONE:
|
1025
|
+
features.add(CpuFeatures::ARM::kARMv8a,
|
1026
|
+
CpuFeatures::ARM::kAES,
|
1027
|
+
CpuFeatures::ARM::kSHA1,
|
1028
|
+
CpuFeatures::ARM::kSHA2);
|
1029
|
+
break;
|
1030
|
+
|
1031
|
+
// Apple A8 - ARMv8.0-A.
|
1032
|
+
case AppleHWId::kCpuFamily_TYPHOON:
|
1033
|
+
features.add(CpuFeatures::ARM::kARMv8a,
|
1034
|
+
CpuFeatures::ARM::kAES,
|
1035
|
+
CpuFeatures::ARM::kSHA1,
|
1036
|
+
CpuFeatures::ARM::kSHA2);
|
1037
|
+
break;
|
1038
|
+
|
1039
|
+
// Apple A9 - ARMv8.0-A.
|
1040
|
+
case AppleHWId::kCpuFamily_TWISTER:
|
1041
|
+
features.add(CpuFeatures::ARM::kARMv8a,
|
1042
|
+
CpuFeatures::ARM::kAES,
|
1043
|
+
CpuFeatures::ARM::kSHA1,
|
1044
|
+
CpuFeatures::ARM::kSHA2);
|
1045
|
+
break;
|
1046
|
+
|
1047
|
+
// Apple A10 - ARMv8.1-A.
|
1048
|
+
case AppleHWId::kCpuFamily_HURRICANE:
|
1049
|
+
features.add(CpuFeatures::ARM::kARMv8_1a,
|
1050
|
+
CpuFeatures::ARM::kAES,
|
1051
|
+
CpuFeatures::ARM::kRDM,
|
1052
|
+
CpuFeatures::ARM::kSHA1,
|
1053
|
+
CpuFeatures::ARM::kSHA2);
|
1054
|
+
|
1055
|
+
break;
|
1056
|
+
|
1057
|
+
// Apple A11 - ARMv8.2-A.
|
1058
|
+
case AppleHWId::kCpuFamily_MONSOON_MISTRAL:
|
1059
|
+
features.add(CpuFeatures::ARM::kARMv8_2a,
|
1060
|
+
CpuFeatures::ARM::kAES,
|
1061
|
+
CpuFeatures::ARM::kFP16FULL,
|
1062
|
+
CpuFeatures::ARM::kSHA1,
|
1063
|
+
CpuFeatures::ARM::kSHA2);
|
1064
|
+
break;
|
1065
|
+
|
1066
|
+
// Apple A12 - ARMv8.3-A.
|
1067
|
+
case AppleHWId::kCpuFamily_VORTEX_TEMPEST:
|
1068
|
+
features.add(CpuFeatures::ARM::kARMv8_3a,
|
1069
|
+
CpuFeatures::ARM::kAES,
|
1070
|
+
CpuFeatures::ARM::kFP16FULL,
|
1071
|
+
CpuFeatures::ARM::kSHA1,
|
1072
|
+
CpuFeatures::ARM::kSHA2);
|
1073
|
+
break;
|
1074
|
+
|
1075
|
+
// Apple A13 - ARMv8.4-A.
|
1076
|
+
case AppleHWId::kCpuFamily_LIGHTNING_THUNDER:
|
1077
|
+
features.add(CpuFeatures::ARM::kARMv8_4a,
|
1078
|
+
CpuFeatures::ARM::kAES,
|
1079
|
+
CpuFeatures::ARM::kFP16FML,
|
1080
|
+
CpuFeatures::ARM::kFP16FULL,
|
1081
|
+
CpuFeatures::ARM::kSHA1,
|
1082
|
+
CpuFeatures::ARM::kSHA2,
|
1083
|
+
CpuFeatures::ARM::kSHA3,
|
1084
|
+
CpuFeatures::ARM::kSHA512);
|
1085
|
+
break;
|
1086
|
+
|
1087
|
+
// Apple A14/M1 - ARMv8.5-A.
|
1088
|
+
case AppleHWId::kCpuFamily_FIRESTORM_ICESTORM:
|
1089
|
+
features.add(CpuFeatures::ARM::kARMv8_4a,
|
1090
|
+
CpuFeatures::ARM::kAES,
|
1091
|
+
CpuFeatures::ARM::kALTNZCV,
|
1092
|
+
CpuFeatures::ARM::kFP16FML,
|
1093
|
+
CpuFeatures::ARM::kFP16FULL,
|
1094
|
+
CpuFeatures::ARM::kFRINT,
|
1095
|
+
CpuFeatures::ARM::kSB,
|
1096
|
+
CpuFeatures::ARM::kSHA1,
|
1097
|
+
CpuFeatures::ARM::kSHA2,
|
1098
|
+
CpuFeatures::ARM::kSHA3,
|
1099
|
+
CpuFeatures::ARM::kSHA512,
|
1100
|
+
CpuFeatures::ARM::kSSBS);
|
1101
|
+
break;
|
1102
|
+
|
1103
|
+
default:
|
1104
|
+
cpu._wasDetected = false;
|
1105
|
+
break;
|
1106
|
+
}
|
1107
|
+
|
1108
|
+
expandARMFeaturesByVersion(cpu);
|
1109
|
+
}
|
1110
|
+
|
1111
|
+
// CpuInfo - Detect - ARM [Unknown]
|
1112
|
+
// ================================
|
1113
|
+
|
1114
|
+
#else
|
1115
|
+
|
1116
|
+
#if ASMJIT_ARCH_ARM == 64
|
1117
|
+
#pragma message("[asmjit] Disabling runtime CPU detection - unsupported OS/CPU combination (Unknown OS with AArch64 CPU)")
|
1118
|
+
#else
|
1119
|
+
#pragma message("[asmjit] Disabling runtime CPU detection - unsupported OS/CPU combination (Unknown OS with ARM CPU)")
|
1120
|
+
#endif
|
1121
|
+
|
1122
|
+
static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
|
1123
|
+
populateBaseARMFeatures(cpu);
|
1124
|
+
detectARMFeaturesViaCompilerFlags(cpu);
|
1125
|
+
expandARMFeaturesByVersion(cpu);
|
1126
|
+
}
|
1127
|
+
#endif
|
1128
|
+
|
1129
|
+
#endif
|
1130
|
+
|
1131
|
+
// CpuInfo - Detect - Host
|
1132
|
+
// =======================
|
1133
|
+
|
1134
|
+
static uint32_t cpuInfoInitialized;
|
1135
|
+
static CpuInfo cpuInfoGlobal(Globals::NoInit);
|
1136
|
+
|
1137
|
+
const CpuInfo& CpuInfo::host() noexcept {
|
1138
|
+
// This should never cause a problem as the resulting information should always be the same. In the worst case we
|
1139
|
+
// would just overwrite it non-atomically.
|
1140
|
+
if (!cpuInfoInitialized) {
|
1141
|
+
CpuInfo cpuInfoLocal;
|
1142
|
+
|
1143
|
+
cpuInfoLocal._arch = Arch::kHost;
|
1144
|
+
cpuInfoLocal._subArch = SubArch::kHost;
|
1145
|
+
|
1146
|
+
#if ASMJIT_ARCH_X86
|
1147
|
+
detectX86Cpu(cpuInfoLocal);
|
1148
|
+
#elif ASMJIT_ARCH_ARM
|
1149
|
+
detectARMCpu(cpuInfoLocal);
|
1150
|
+
#else
|
1151
|
+
#pragma message("[asmjit] Disabling runtime CPU detection - unsupported OS/CPU combination (Unknown CPU)")
|
1152
|
+
#endif
|
1153
|
+
|
1154
|
+
cpuInfoLocal._hwThreadCount = detectHWThreadCount();
|
1155
|
+
cpuInfoGlobal = cpuInfoLocal;
|
1156
|
+
cpuInfoInitialized = 1;
|
1157
|
+
}
|
1158
|
+
|
1159
|
+
return cpuInfoGlobal;
|
1160
|
+
}
|
1161
|
+
|
1162
|
+
ASMJIT_END_NAMESPACE
|