asmjit 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (201) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +1 -1
  3. data/asmjit.gemspec +1 -1
  4. data/ext/asmjit/asmjit/.editorconfig +10 -0
  5. data/ext/asmjit/asmjit/.github/FUNDING.yml +1 -0
  6. data/ext/asmjit/asmjit/.github/workflows/build-config.json +47 -0
  7. data/ext/asmjit/asmjit/.github/workflows/build.yml +156 -0
  8. data/ext/asmjit/asmjit/.gitignore +6 -0
  9. data/ext/asmjit/asmjit/CMakeLists.txt +611 -0
  10. data/ext/asmjit/asmjit/LICENSE.md +17 -0
  11. data/ext/asmjit/asmjit/README.md +69 -0
  12. data/ext/asmjit/asmjit/src/asmjit/a64.h +62 -0
  13. data/ext/asmjit/asmjit/src/asmjit/arm/a64archtraits_p.h +81 -0
  14. data/ext/asmjit/asmjit/src/asmjit/arm/a64assembler.cpp +5115 -0
  15. data/ext/asmjit/asmjit/src/asmjit/arm/a64assembler.h +72 -0
  16. data/ext/asmjit/asmjit/src/asmjit/arm/a64builder.cpp +51 -0
  17. data/ext/asmjit/asmjit/src/asmjit/arm/a64builder.h +57 -0
  18. data/ext/asmjit/asmjit/src/asmjit/arm/a64compiler.cpp +60 -0
  19. data/ext/asmjit/asmjit/src/asmjit/arm/a64compiler.h +247 -0
  20. data/ext/asmjit/asmjit/src/asmjit/arm/a64emithelper.cpp +464 -0
  21. data/ext/asmjit/asmjit/src/asmjit/arm/a64emithelper_p.h +50 -0
  22. data/ext/asmjit/asmjit/src/asmjit/arm/a64emitter.h +1228 -0
  23. data/ext/asmjit/asmjit/src/asmjit/arm/a64formatter.cpp +298 -0
  24. data/ext/asmjit/asmjit/src/asmjit/arm/a64formatter_p.h +59 -0
  25. data/ext/asmjit/asmjit/src/asmjit/arm/a64func.cpp +189 -0
  26. data/ext/asmjit/asmjit/src/asmjit/arm/a64func_p.h +33 -0
  27. data/ext/asmjit/asmjit/src/asmjit/arm/a64globals.h +1894 -0
  28. data/ext/asmjit/asmjit/src/asmjit/arm/a64instapi.cpp +278 -0
  29. data/ext/asmjit/asmjit/src/asmjit/arm/a64instapi_p.h +41 -0
  30. data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb.cpp +1957 -0
  31. data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb.h +74 -0
  32. data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb_p.h +876 -0
  33. data/ext/asmjit/asmjit/src/asmjit/arm/a64operand.cpp +85 -0
  34. data/ext/asmjit/asmjit/src/asmjit/arm/a64operand.h +312 -0
  35. data/ext/asmjit/asmjit/src/asmjit/arm/a64rapass.cpp +852 -0
  36. data/ext/asmjit/asmjit/src/asmjit/arm/a64rapass_p.h +105 -0
  37. data/ext/asmjit/asmjit/src/asmjit/arm/a64utils.h +179 -0
  38. data/ext/asmjit/asmjit/src/asmjit/arm/armformatter.cpp +143 -0
  39. data/ext/asmjit/asmjit/src/asmjit/arm/armformatter_p.h +44 -0
  40. data/ext/asmjit/asmjit/src/asmjit/arm/armglobals.h +21 -0
  41. data/ext/asmjit/asmjit/src/asmjit/arm/armoperand.h +621 -0
  42. data/ext/asmjit/asmjit/src/asmjit/arm.h +62 -0
  43. data/ext/asmjit/asmjit/src/asmjit/asmjit-scope-begin.h +17 -0
  44. data/ext/asmjit/asmjit/src/asmjit/asmjit-scope-end.h +9 -0
  45. data/ext/asmjit/asmjit/src/asmjit/asmjit.h +33 -0
  46. data/ext/asmjit/asmjit/src/asmjit/core/api-build_p.h +55 -0
  47. data/ext/asmjit/asmjit/src/asmjit/core/api-config.h +613 -0
  48. data/ext/asmjit/asmjit/src/asmjit/core/archcommons.h +229 -0
  49. data/ext/asmjit/asmjit/src/asmjit/core/archtraits.cpp +160 -0
  50. data/ext/asmjit/asmjit/src/asmjit/core/archtraits.h +290 -0
  51. data/ext/asmjit/asmjit/src/asmjit/core/assembler.cpp +406 -0
  52. data/ext/asmjit/asmjit/src/asmjit/core/assembler.h +129 -0
  53. data/ext/asmjit/asmjit/src/asmjit/core/builder.cpp +889 -0
  54. data/ext/asmjit/asmjit/src/asmjit/core/builder.h +1391 -0
  55. data/ext/asmjit/asmjit/src/asmjit/core/codebuffer.h +113 -0
  56. data/ext/asmjit/asmjit/src/asmjit/core/codeholder.cpp +1149 -0
  57. data/ext/asmjit/asmjit/src/asmjit/core/codeholder.h +1035 -0
  58. data/ext/asmjit/asmjit/src/asmjit/core/codewriter.cpp +175 -0
  59. data/ext/asmjit/asmjit/src/asmjit/core/codewriter_p.h +179 -0
  60. data/ext/asmjit/asmjit/src/asmjit/core/compiler.cpp +582 -0
  61. data/ext/asmjit/asmjit/src/asmjit/core/compiler.h +737 -0
  62. data/ext/asmjit/asmjit/src/asmjit/core/compilerdefs.h +173 -0
  63. data/ext/asmjit/asmjit/src/asmjit/core/constpool.cpp +363 -0
  64. data/ext/asmjit/asmjit/src/asmjit/core/constpool.h +250 -0
  65. data/ext/asmjit/asmjit/src/asmjit/core/cpuinfo.cpp +1162 -0
  66. data/ext/asmjit/asmjit/src/asmjit/core/cpuinfo.h +813 -0
  67. data/ext/asmjit/asmjit/src/asmjit/core/emithelper.cpp +323 -0
  68. data/ext/asmjit/asmjit/src/asmjit/core/emithelper_p.h +58 -0
  69. data/ext/asmjit/asmjit/src/asmjit/core/emitter.cpp +333 -0
  70. data/ext/asmjit/asmjit/src/asmjit/core/emitter.h +741 -0
  71. data/ext/asmjit/asmjit/src/asmjit/core/emitterutils.cpp +129 -0
  72. data/ext/asmjit/asmjit/src/asmjit/core/emitterutils_p.h +89 -0
  73. data/ext/asmjit/asmjit/src/asmjit/core/environment.cpp +46 -0
  74. data/ext/asmjit/asmjit/src/asmjit/core/environment.h +508 -0
  75. data/ext/asmjit/asmjit/src/asmjit/core/errorhandler.cpp +14 -0
  76. data/ext/asmjit/asmjit/src/asmjit/core/errorhandler.h +228 -0
  77. data/ext/asmjit/asmjit/src/asmjit/core/formatter.cpp +584 -0
  78. data/ext/asmjit/asmjit/src/asmjit/core/formatter.h +247 -0
  79. data/ext/asmjit/asmjit/src/asmjit/core/formatter_p.h +34 -0
  80. data/ext/asmjit/asmjit/src/asmjit/core/func.cpp +286 -0
  81. data/ext/asmjit/asmjit/src/asmjit/core/func.h +1445 -0
  82. data/ext/asmjit/asmjit/src/asmjit/core/funcargscontext.cpp +293 -0
  83. data/ext/asmjit/asmjit/src/asmjit/core/funcargscontext_p.h +199 -0
  84. data/ext/asmjit/asmjit/src/asmjit/core/globals.cpp +133 -0
  85. data/ext/asmjit/asmjit/src/asmjit/core/globals.h +393 -0
  86. data/ext/asmjit/asmjit/src/asmjit/core/inst.cpp +113 -0
  87. data/ext/asmjit/asmjit/src/asmjit/core/inst.h +772 -0
  88. data/ext/asmjit/asmjit/src/asmjit/core/jitallocator.cpp +1242 -0
  89. data/ext/asmjit/asmjit/src/asmjit/core/jitallocator.h +261 -0
  90. data/ext/asmjit/asmjit/src/asmjit/core/jitruntime.cpp +80 -0
  91. data/ext/asmjit/asmjit/src/asmjit/core/jitruntime.h +89 -0
  92. data/ext/asmjit/asmjit/src/asmjit/core/logger.cpp +69 -0
  93. data/ext/asmjit/asmjit/src/asmjit/core/logger.h +198 -0
  94. data/ext/asmjit/asmjit/src/asmjit/core/misc_p.h +33 -0
  95. data/ext/asmjit/asmjit/src/asmjit/core/operand.cpp +132 -0
  96. data/ext/asmjit/asmjit/src/asmjit/core/operand.h +1611 -0
  97. data/ext/asmjit/asmjit/src/asmjit/core/osutils.cpp +84 -0
  98. data/ext/asmjit/asmjit/src/asmjit/core/osutils.h +61 -0
  99. data/ext/asmjit/asmjit/src/asmjit/core/osutils_p.h +68 -0
  100. data/ext/asmjit/asmjit/src/asmjit/core/raassignment_p.h +418 -0
  101. data/ext/asmjit/asmjit/src/asmjit/core/rabuilders_p.h +612 -0
  102. data/ext/asmjit/asmjit/src/asmjit/core/radefs_p.h +1204 -0
  103. data/ext/asmjit/asmjit/src/asmjit/core/ralocal.cpp +1166 -0
  104. data/ext/asmjit/asmjit/src/asmjit/core/ralocal_p.h +254 -0
  105. data/ext/asmjit/asmjit/src/asmjit/core/rapass.cpp +1969 -0
  106. data/ext/asmjit/asmjit/src/asmjit/core/rapass_p.h +1183 -0
  107. data/ext/asmjit/asmjit/src/asmjit/core/rastack.cpp +184 -0
  108. data/ext/asmjit/asmjit/src/asmjit/core/rastack_p.h +171 -0
  109. data/ext/asmjit/asmjit/src/asmjit/core/string.cpp +559 -0
  110. data/ext/asmjit/asmjit/src/asmjit/core/string.h +372 -0
  111. data/ext/asmjit/asmjit/src/asmjit/core/support.cpp +494 -0
  112. data/ext/asmjit/asmjit/src/asmjit/core/support.h +1773 -0
  113. data/ext/asmjit/asmjit/src/asmjit/core/target.cpp +14 -0
  114. data/ext/asmjit/asmjit/src/asmjit/core/target.h +53 -0
  115. data/ext/asmjit/asmjit/src/asmjit/core/type.cpp +74 -0
  116. data/ext/asmjit/asmjit/src/asmjit/core/type.h +419 -0
  117. data/ext/asmjit/asmjit/src/asmjit/core/virtmem.cpp +722 -0
  118. data/ext/asmjit/asmjit/src/asmjit/core/virtmem.h +242 -0
  119. data/ext/asmjit/asmjit/src/asmjit/core/zone.cpp +353 -0
  120. data/ext/asmjit/asmjit/src/asmjit/core/zone.h +615 -0
  121. data/ext/asmjit/asmjit/src/asmjit/core/zonehash.cpp +309 -0
  122. data/ext/asmjit/asmjit/src/asmjit/core/zonehash.h +186 -0
  123. data/ext/asmjit/asmjit/src/asmjit/core/zonelist.cpp +163 -0
  124. data/ext/asmjit/asmjit/src/asmjit/core/zonelist.h +209 -0
  125. data/ext/asmjit/asmjit/src/asmjit/core/zonestack.cpp +176 -0
  126. data/ext/asmjit/asmjit/src/asmjit/core/zonestack.h +239 -0
  127. data/ext/asmjit/asmjit/src/asmjit/core/zonestring.h +120 -0
  128. data/ext/asmjit/asmjit/src/asmjit/core/zonetree.cpp +99 -0
  129. data/ext/asmjit/asmjit/src/asmjit/core/zonetree.h +380 -0
  130. data/ext/asmjit/asmjit/src/asmjit/core/zonevector.cpp +356 -0
  131. data/ext/asmjit/asmjit/src/asmjit/core/zonevector.h +690 -0
  132. data/ext/asmjit/asmjit/src/asmjit/core.h +1861 -0
  133. data/ext/asmjit/asmjit/src/asmjit/x86/x86archtraits_p.h +148 -0
  134. data/ext/asmjit/asmjit/src/asmjit/x86/x86assembler.cpp +5110 -0
  135. data/ext/asmjit/asmjit/src/asmjit/x86/x86assembler.h +685 -0
  136. data/ext/asmjit/asmjit/src/asmjit/x86/x86builder.cpp +52 -0
  137. data/ext/asmjit/asmjit/src/asmjit/x86/x86builder.h +351 -0
  138. data/ext/asmjit/asmjit/src/asmjit/x86/x86compiler.cpp +61 -0
  139. data/ext/asmjit/asmjit/src/asmjit/x86/x86compiler.h +721 -0
  140. data/ext/asmjit/asmjit/src/asmjit/x86/x86emithelper.cpp +619 -0
  141. data/ext/asmjit/asmjit/src/asmjit/x86/x86emithelper_p.h +60 -0
  142. data/ext/asmjit/asmjit/src/asmjit/x86/x86emitter.h +4315 -0
  143. data/ext/asmjit/asmjit/src/asmjit/x86/x86formatter.cpp +944 -0
  144. data/ext/asmjit/asmjit/src/asmjit/x86/x86formatter_p.h +58 -0
  145. data/ext/asmjit/asmjit/src/asmjit/x86/x86func.cpp +503 -0
  146. data/ext/asmjit/asmjit/src/asmjit/x86/x86func_p.h +33 -0
  147. data/ext/asmjit/asmjit/src/asmjit/x86/x86globals.h +2169 -0
  148. data/ext/asmjit/asmjit/src/asmjit/x86/x86instapi.cpp +1732 -0
  149. data/ext/asmjit/asmjit/src/asmjit/x86/x86instapi_p.h +41 -0
  150. data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb.cpp +4427 -0
  151. data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb.h +563 -0
  152. data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb_p.h +311 -0
  153. data/ext/asmjit/asmjit/src/asmjit/x86/x86opcode_p.h +436 -0
  154. data/ext/asmjit/asmjit/src/asmjit/x86/x86operand.cpp +231 -0
  155. data/ext/asmjit/asmjit/src/asmjit/x86/x86operand.h +1085 -0
  156. data/ext/asmjit/asmjit/src/asmjit/x86/x86rapass.cpp +1509 -0
  157. data/ext/asmjit/asmjit/src/asmjit/x86/x86rapass_p.h +94 -0
  158. data/ext/asmjit/asmjit/src/asmjit/x86.h +93 -0
  159. data/ext/asmjit/asmjit/src/asmjit.natvis +245 -0
  160. data/ext/asmjit/asmjit/test/asmjit_test_assembler.cpp +84 -0
  161. data/ext/asmjit/asmjit/test/asmjit_test_assembler.h +85 -0
  162. data/ext/asmjit/asmjit/test/asmjit_test_assembler_a64.cpp +4006 -0
  163. data/ext/asmjit/asmjit/test/asmjit_test_assembler_x64.cpp +17833 -0
  164. data/ext/asmjit/asmjit/test/asmjit_test_assembler_x86.cpp +8300 -0
  165. data/ext/asmjit/asmjit/test/asmjit_test_compiler.cpp +253 -0
  166. data/ext/asmjit/asmjit/test/asmjit_test_compiler.h +73 -0
  167. data/ext/asmjit/asmjit/test/asmjit_test_compiler_a64.cpp +690 -0
  168. data/ext/asmjit/asmjit/test/asmjit_test_compiler_x86.cpp +4317 -0
  169. data/ext/asmjit/asmjit/test/asmjit_test_emitters.cpp +197 -0
  170. data/ext/asmjit/asmjit/test/asmjit_test_instinfo.cpp +181 -0
  171. data/ext/asmjit/asmjit/test/asmjit_test_misc.h +257 -0
  172. data/ext/asmjit/asmjit/test/asmjit_test_perf.cpp +62 -0
  173. data/ext/asmjit/asmjit/test/asmjit_test_perf.h +61 -0
  174. data/ext/asmjit/asmjit/test/asmjit_test_perf_a64.cpp +699 -0
  175. data/ext/asmjit/asmjit/test/asmjit_test_perf_x86.cpp +5032 -0
  176. data/ext/asmjit/asmjit/test/asmjit_test_unit.cpp +172 -0
  177. data/ext/asmjit/asmjit/test/asmjit_test_x86_sections.cpp +172 -0
  178. data/ext/asmjit/asmjit/test/asmjitutils.h +38 -0
  179. data/ext/asmjit/asmjit/test/broken.cpp +312 -0
  180. data/ext/asmjit/asmjit/test/broken.h +148 -0
  181. data/ext/asmjit/asmjit/test/cmdline.h +61 -0
  182. data/ext/asmjit/asmjit/test/performancetimer.h +41 -0
  183. data/ext/asmjit/asmjit/tools/configure-makefiles.sh +13 -0
  184. data/ext/asmjit/asmjit/tools/configure-ninja.sh +13 -0
  185. data/ext/asmjit/asmjit/tools/configure-sanitizers.sh +13 -0
  186. data/ext/asmjit/asmjit/tools/configure-vs2019-x64.bat +2 -0
  187. data/ext/asmjit/asmjit/tools/configure-vs2019-x86.bat +2 -0
  188. data/ext/asmjit/asmjit/tools/configure-vs2022-x64.bat +2 -0
  189. data/ext/asmjit/asmjit/tools/configure-vs2022-x86.bat +2 -0
  190. data/ext/asmjit/asmjit/tools/configure-xcode.sh +8 -0
  191. data/ext/asmjit/asmjit/tools/enumgen.js +417 -0
  192. data/ext/asmjit/asmjit/tools/enumgen.sh +3 -0
  193. data/ext/asmjit/asmjit/tools/tablegen-arm.js +365 -0
  194. data/ext/asmjit/asmjit/tools/tablegen-arm.sh +3 -0
  195. data/ext/asmjit/asmjit/tools/tablegen-x86.js +2638 -0
  196. data/ext/asmjit/asmjit/tools/tablegen-x86.sh +3 -0
  197. data/ext/asmjit/asmjit/tools/tablegen.js +947 -0
  198. data/ext/asmjit/asmjit/tools/tablegen.sh +4 -0
  199. data/ext/asmjit/asmjit.cc +18 -0
  200. data/lib/asmjit/version.rb +1 -1
  201. metadata +197 -2
@@ -0,0 +1,1162 @@
1
+ // This file is part of AsmJit project <https://asmjit.com>
2
+ //
3
+ // See asmjit.h or LICENSE.md for license and copyright information
4
+ // SPDX-License-Identifier: Zlib
5
+
6
+ #include "../core/api-build_p.h"
7
+ #include "../core/cpuinfo.h"
8
+ #include "../core/support.h"
9
+
10
+ #if !defined(_WIN32)
11
+ #include <errno.h>
12
+ #include <sys/utsname.h>
13
+ #include <unistd.h>
14
+ #endif
15
+
16
+ // Required by `getauxval()` on Linux.
17
+ #if defined(__linux__)
18
+ #include <sys/auxv.h>
19
+ #endif
20
+
21
+ //! Required to detect CPU and features on Apple platforms.
22
+ #if defined(__APPLE__)
23
+ #include <mach/machine.h>
24
+ #include <sys/types.h>
25
+ #include <sys/sysctl.h>
26
+ #endif
27
+
28
+ // Required by `__cpuidex()` and `_xgetbv()`.
29
+ #if defined(_MSC_VER)
30
+ #include <intrin.h>
31
+ #endif
32
+
33
+ ASMJIT_BEGIN_NAMESPACE
34
+
35
+ // CpuInfo - Detect - HW-Thread Count
36
+ // ==================================
37
+
38
+ #if defined(_WIN32)
39
+ static inline uint32_t detectHWThreadCount() noexcept {
40
+ SYSTEM_INFO info;
41
+ ::GetSystemInfo(&info);
42
+ return info.dwNumberOfProcessors;
43
+ }
44
+ #elif defined(_SC_NPROCESSORS_ONLN)
45
+ static inline uint32_t detectHWThreadCount() noexcept {
46
+ long res = ::sysconf(_SC_NPROCESSORS_ONLN);
47
+ return res <= 0 ? uint32_t(1) : uint32_t(res);
48
+ }
49
+ #else
50
+ static inline uint32_t detectHWThreadCount() noexcept {
51
+ return 1;
52
+ }
53
+ #endif
54
+
55
+ // CpuInfo - Detect - X86
56
+ // ======================
57
+
58
+ #if ASMJIT_ARCH_X86
59
+
60
+ struct cpuid_t { uint32_t eax, ebx, ecx, edx; };
61
+ struct xgetbv_t { uint32_t eax, edx; };
62
+
63
+ // Executes `cpuid` instruction.
64
+ static inline void cpuidQuery(cpuid_t* out, uint32_t inEax, uint32_t inEcx = 0) noexcept {
65
+ #if defined(_MSC_VER)
66
+ __cpuidex(reinterpret_cast<int*>(out), inEax, inEcx);
67
+ #elif defined(__GNUC__) && ASMJIT_ARCH_X86 == 32
68
+ __asm__ __volatile__(
69
+ "mov %%ebx, %%edi\n"
70
+ "cpuid\n"
71
+ "xchg %%edi, %%ebx\n" : "=a"(out->eax), "=D"(out->ebx), "=c"(out->ecx), "=d"(out->edx) : "a"(inEax), "c"(inEcx));
72
+ #elif defined(__GNUC__) && ASMJIT_ARCH_X86 == 64
73
+ __asm__ __volatile__(
74
+ "mov %%rbx, %%rdi\n"
75
+ "cpuid\n"
76
+ "xchg %%rdi, %%rbx\n" : "=a"(out->eax), "=D"(out->ebx), "=c"(out->ecx), "=d"(out->edx) : "a"(inEax), "c"(inEcx));
77
+ #else
78
+ #error "[asmjit] x86::cpuidQuery() - Unsupported compiler."
79
+ #endif
80
+ }
81
+
82
+ // Executes 'xgetbv' instruction.
83
+ static inline void xgetbvQuery(xgetbv_t* out, uint32_t inEcx) noexcept {
84
+ #if defined(_MSC_VER)
85
+ uint64_t value = _xgetbv(inEcx);
86
+ out->eax = uint32_t(value & 0xFFFFFFFFu);
87
+ out->edx = uint32_t(value >> 32);
88
+ #elif defined(__GNUC__)
89
+ uint32_t outEax;
90
+ uint32_t outEdx;
91
+
92
+ // Replaced, because the world is not perfect:
93
+ // __asm__ __volatile__("xgetbv" : "=a"(outEax), "=d"(outEdx) : "c"(inEcx));
94
+ __asm__ __volatile__(".byte 0x0F, 0x01, 0xD0" : "=a"(outEax), "=d"(outEdx) : "c"(inEcx));
95
+
96
+ out->eax = outEax;
97
+ out->edx = outEdx;
98
+ #else
99
+ out->eax = 0;
100
+ out->edx = 0;
101
+ #endif
102
+ }
103
+
104
+ // Map a 12-byte vendor string returned by `cpuid` into a `CpuInfo::Vendor` ID.
105
+ static inline void simplifyCpuVendor(CpuInfo& cpu, uint32_t d0, uint32_t d1, uint32_t d2) noexcept {
106
+ struct Vendor {
107
+ char normalized[8];
108
+ union { char text[12]; uint32_t d[3]; };
109
+ };
110
+
111
+ static const Vendor table[] = {
112
+ { { 'A', 'M', 'D' }, {{ 'A', 'u', 't', 'h', 'e', 'n', 't', 'i', 'c', 'A', 'M', 'D' }} },
113
+ { { 'I', 'N', 'T', 'E', 'L' }, {{ 'G', 'e', 'n', 'u', 'i', 'n', 'e', 'I', 'n', 't', 'e', 'l' }} },
114
+ { { 'V', 'I', 'A' }, {{ 'C', 'e', 'n', 't', 'a', 'u', 'r', 'H', 'a', 'u', 'l', 's' }} },
115
+ { { 'V', 'I', 'A' }, {{ 'V', 'I', 'A', 0 , 'V', 'I', 'A', 0 , 'V', 'I', 'A', 0 }} },
116
+ { { 'U', 'N', 'K', 'N', 'O', 'W', 'N' }, {{ 0 }} }
117
+ };
118
+
119
+ uint32_t i;
120
+ for (i = 0; i < ASMJIT_ARRAY_SIZE(table) - 1; i++)
121
+ if (table[i].d[0] == d0 && table[i].d[1] == d1 && table[i].d[2] == d2)
122
+ break;
123
+ memcpy(cpu._vendor.str, table[i].normalized, 8);
124
+ }
125
+
126
+ static ASMJIT_FAVOR_SIZE void simplifyCpuBrand(char* s) noexcept {
127
+ char* d = s;
128
+
129
+ char c = s[0];
130
+ char prev = 0;
131
+
132
+ // Used to always clear the current character to ensure that the result
133
+ // doesn't contain garbage after a new null terminator is placed at the end.
134
+ s[0] = '\0';
135
+
136
+ for (;;) {
137
+ if (!c)
138
+ break;
139
+
140
+ if (!(c == ' ' && (prev == '@' || s[1] == ' ' || s[1] == '@'))) {
141
+ *d++ = c;
142
+ prev = c;
143
+ }
144
+
145
+ c = *++s;
146
+ s[0] = '\0';
147
+ }
148
+
149
+ d[0] = '\0';
150
+ }
151
+
152
+ static ASMJIT_FAVOR_SIZE void detectX86Cpu(CpuInfo& cpu) noexcept {
153
+ using Support::bitTest;
154
+
155
+ cpuid_t regs;
156
+ xgetbv_t xcr0 { 0, 0 };
157
+ CpuFeatures::X86& features = cpu.features().x86();
158
+
159
+ cpu._wasDetected = true;
160
+ cpu._maxLogicalProcessors = 1;
161
+
162
+ // We are gonna execute CPUID, which was introduced by I486, so it's the requirement.
163
+ features.add(CpuFeatures::X86::kI486);
164
+
165
+ // CPUID EAX=0
166
+ // -----------
167
+
168
+ // Get vendor string/id.
169
+ cpuidQuery(&regs, 0x0);
170
+
171
+ uint32_t maxId = regs.eax;
172
+ uint32_t maxSubLeafId_0x7 = 0;
173
+
174
+ simplifyCpuVendor(cpu, regs.ebx, regs.edx, regs.ecx);
175
+
176
+ // CPUID EAX=1
177
+ // -----------
178
+
179
+ if (maxId >= 0x1) {
180
+ // Get feature flags in ECX/EDX and family/model in EAX.
181
+ cpuidQuery(&regs, 0x1);
182
+
183
+ // Fill family and model fields.
184
+ uint32_t modelId = (regs.eax >> 4) & 0x0F;
185
+ uint32_t familyId = (regs.eax >> 8) & 0x0F;
186
+
187
+ // Use extended family and model fields.
188
+ if (familyId == 0x06u || familyId == 0x0Fu)
189
+ modelId += (((regs.eax >> 16) & 0x0Fu) << 4);
190
+
191
+ if (familyId == 0x0Fu)
192
+ familyId += ((regs.eax >> 20) & 0xFFu);
193
+
194
+ cpu._modelId = modelId;
195
+ cpu._familyId = familyId;
196
+ cpu._brandId = ((regs.ebx ) & 0xFF);
197
+ cpu._processorType = ((regs.eax >> 12) & 0x03);
198
+ cpu._maxLogicalProcessors = ((regs.ebx >> 16) & 0xFF);
199
+ cpu._stepping = ((regs.eax ) & 0x0F);
200
+ cpu._cacheLineSize = ((regs.ebx >> 8) & 0xFF) * 8;
201
+
202
+ features.addIf(bitTest(regs.ecx, 0), CpuFeatures::X86::kSSE3);
203
+ features.addIf(bitTest(regs.ecx, 1), CpuFeatures::X86::kPCLMULQDQ);
204
+ features.addIf(bitTest(regs.ecx, 3), CpuFeatures::X86::kMONITOR);
205
+ features.addIf(bitTest(regs.ecx, 5), CpuFeatures::X86::kVMX);
206
+ features.addIf(bitTest(regs.ecx, 6), CpuFeatures::X86::kSMX);
207
+ features.addIf(bitTest(regs.ecx, 9), CpuFeatures::X86::kSSSE3);
208
+ features.addIf(bitTest(regs.ecx, 13), CpuFeatures::X86::kCMPXCHG16B);
209
+ features.addIf(bitTest(regs.ecx, 19), CpuFeatures::X86::kSSE4_1);
210
+ features.addIf(bitTest(regs.ecx, 20), CpuFeatures::X86::kSSE4_2);
211
+ features.addIf(bitTest(regs.ecx, 22), CpuFeatures::X86::kMOVBE);
212
+ features.addIf(bitTest(regs.ecx, 23), CpuFeatures::X86::kPOPCNT);
213
+ features.addIf(bitTest(regs.ecx, 25), CpuFeatures::X86::kAESNI);
214
+ features.addIf(bitTest(regs.ecx, 26), CpuFeatures::X86::kXSAVE);
215
+ features.addIf(bitTest(regs.ecx, 27), CpuFeatures::X86::kOSXSAVE);
216
+ features.addIf(bitTest(regs.ecx, 30), CpuFeatures::X86::kRDRAND);
217
+ features.addIf(bitTest(regs.edx, 0), CpuFeatures::X86::kFPU);
218
+ features.addIf(bitTest(regs.edx, 4), CpuFeatures::X86::kRDTSC);
219
+ features.addIf(bitTest(regs.edx, 5), CpuFeatures::X86::kMSR);
220
+ features.addIf(bitTest(regs.edx, 8), CpuFeatures::X86::kCMPXCHG8B);
221
+ features.addIf(bitTest(regs.edx, 15), CpuFeatures::X86::kCMOV);
222
+ features.addIf(bitTest(regs.edx, 19), CpuFeatures::X86::kCLFLUSH);
223
+ features.addIf(bitTest(regs.edx, 23), CpuFeatures::X86::kMMX);
224
+ features.addIf(bitTest(regs.edx, 24), CpuFeatures::X86::kFXSR);
225
+ features.addIf(bitTest(regs.edx, 25), CpuFeatures::X86::kSSE);
226
+ features.addIf(bitTest(regs.edx, 25), CpuFeatures::X86::kSSE, CpuFeatures::X86::kSSE2);
227
+ features.addIf(bitTest(regs.edx, 28), CpuFeatures::X86::kMT);
228
+
229
+ // Get the content of XCR0 if supported by the CPU and enabled by the OS.
230
+ if (features.hasXSAVE() && features.hasOSXSAVE()) {
231
+ xgetbvQuery(&xcr0, 0);
232
+ }
233
+
234
+ // Detect AVX+.
235
+ if (bitTest(regs.ecx, 28)) {
236
+ // - XCR0[2:1] == 11b
237
+ // XMM & YMM states need to be enabled by OS.
238
+ if ((xcr0.eax & 0x00000006u) == 0x00000006u) {
239
+ features.add(CpuFeatures::X86::kAVX);
240
+ features.addIf(bitTest(regs.ecx, 12), CpuFeatures::X86::kFMA);
241
+ features.addIf(bitTest(regs.ecx, 29), CpuFeatures::X86::kF16C);
242
+ }
243
+ }
244
+ }
245
+
246
+ constexpr uint32_t kXCR0_AMX_Bits = 0x3u << 17;
247
+ bool amxEnabledByOS = (xcr0.eax & kXCR0_AMX_Bits) == kXCR0_AMX_Bits;
248
+
249
+ #if defined(__APPLE__)
250
+ // Apple platform provides on-demand AVX512 support. When an AVX512 instruction is used the first time it results
251
+ // in #UD, which would cause the thread being promoted to use AVX512 support by the OS in addition to enabling the
252
+ // necessary bits in XCR0 register.
253
+ bool avx512EnabledByOS = true;
254
+ #else
255
+ // - XCR0[2:1] == 11b - XMM/YMM states need to be enabled by OS.
256
+ // - XCR0[7:5] == 111b - Upper 256-bit of ZMM0-XMM15 and ZMM16-ZMM31 need to be enabled by OS.
257
+ constexpr uint32_t kXCR0_AVX512_Bits = (0x3u << 1) | (0x7u << 5);
258
+ bool avx512EnabledByOS = (xcr0.eax & kXCR0_AVX512_Bits) == kXCR0_AVX512_Bits;
259
+ #endif
260
+
261
+ // CPUID EAX=7 ECX=0
262
+ // -----------------
263
+
264
+ // Detect new features if the processor supports CPUID-07.
265
+ bool maybeMPX = false;
266
+
267
+ if (maxId >= 0x7) {
268
+ cpuidQuery(&regs, 0x7);
269
+
270
+ maybeMPX = bitTest(regs.ebx, 14);
271
+ maxSubLeafId_0x7 = regs.eax;
272
+
273
+ features.addIf(bitTest(regs.ebx, 0), CpuFeatures::X86::kFSGSBASE);
274
+ features.addIf(bitTest(regs.ebx, 3), CpuFeatures::X86::kBMI);
275
+ features.addIf(bitTest(regs.ebx, 4), CpuFeatures::X86::kHLE);
276
+ features.addIf(bitTest(regs.ebx, 7), CpuFeatures::X86::kSMEP);
277
+ features.addIf(bitTest(regs.ebx, 8), CpuFeatures::X86::kBMI2);
278
+ features.addIf(bitTest(regs.ebx, 9), CpuFeatures::X86::kERMS);
279
+ features.addIf(bitTest(regs.ebx, 11), CpuFeatures::X86::kRTM);
280
+ features.addIf(bitTest(regs.ebx, 18), CpuFeatures::X86::kRDSEED);
281
+ features.addIf(bitTest(regs.ebx, 19), CpuFeatures::X86::kADX);
282
+ features.addIf(bitTest(regs.ebx, 20), CpuFeatures::X86::kSMAP);
283
+ features.addIf(bitTest(regs.ebx, 23), CpuFeatures::X86::kCLFLUSHOPT);
284
+ features.addIf(bitTest(regs.ebx, 24), CpuFeatures::X86::kCLWB);
285
+ features.addIf(bitTest(regs.ebx, 29), CpuFeatures::X86::kSHA);
286
+ features.addIf(bitTest(regs.ecx, 0), CpuFeatures::X86::kPREFETCHWT1);
287
+ features.addIf(bitTest(regs.ecx, 4), CpuFeatures::X86::kOSPKE);
288
+ features.addIf(bitTest(regs.ecx, 5), CpuFeatures::X86::kWAITPKG);
289
+ features.addIf(bitTest(regs.ecx, 7), CpuFeatures::X86::kCET_SS);
290
+ features.addIf(bitTest(regs.ecx, 8), CpuFeatures::X86::kGFNI);
291
+ features.addIf(bitTest(regs.ecx, 9), CpuFeatures::X86::kVAES);
292
+ features.addIf(bitTest(regs.ecx, 10), CpuFeatures::X86::kVPCLMULQDQ);
293
+ features.addIf(bitTest(regs.ecx, 22), CpuFeatures::X86::kRDPID);
294
+ features.addIf(bitTest(regs.ecx, 25), CpuFeatures::X86::kCLDEMOTE);
295
+ features.addIf(bitTest(regs.ecx, 27), CpuFeatures::X86::kMOVDIRI);
296
+ features.addIf(bitTest(regs.ecx, 28), CpuFeatures::X86::kMOVDIR64B);
297
+ features.addIf(bitTest(regs.ecx, 29), CpuFeatures::X86::kENQCMD);
298
+ features.addIf(bitTest(regs.edx, 5), CpuFeatures::X86::kUINTR);
299
+ features.addIf(bitTest(regs.edx, 14), CpuFeatures::X86::kSERIALIZE);
300
+ features.addIf(bitTest(regs.edx, 16), CpuFeatures::X86::kTSXLDTRK);
301
+ features.addIf(bitTest(regs.edx, 18), CpuFeatures::X86::kPCONFIG);
302
+ features.addIf(bitTest(regs.edx, 20), CpuFeatures::X86::kCET_IBT);
303
+
304
+ // Detect 'TSX' - Requires at least one of `HLE` and `RTM` features.
305
+ if (features.hasHLE() || features.hasRTM())
306
+ features.add(CpuFeatures::X86::kTSX);
307
+
308
+ // Detect 'AVX2' - Requires AVX as well.
309
+ if (bitTest(regs.ebx, 5) && features.hasAVX())
310
+ features.add(CpuFeatures::X86::kAVX2);
311
+
312
+ // Detect 'AVX512'.
313
+ if (avx512EnabledByOS && bitTest(regs.ebx, 16)) {
314
+ features.add(CpuFeatures::X86::kAVX512_F);
315
+
316
+ features.addIf(bitTest(regs.ebx, 17), CpuFeatures::X86::kAVX512_DQ);
317
+ features.addIf(bitTest(regs.ebx, 21), CpuFeatures::X86::kAVX512_IFMA);
318
+ features.addIf(bitTest(regs.ebx, 26), CpuFeatures::X86::kAVX512_PFI);
319
+ features.addIf(bitTest(regs.ebx, 27), CpuFeatures::X86::kAVX512_ERI);
320
+ features.addIf(bitTest(regs.ebx, 28), CpuFeatures::X86::kAVX512_CDI);
321
+ features.addIf(bitTest(regs.ebx, 30), CpuFeatures::X86::kAVX512_BW);
322
+ features.addIf(bitTest(regs.ebx, 31), CpuFeatures::X86::kAVX512_VL);
323
+ features.addIf(bitTest(regs.ecx, 1), CpuFeatures::X86::kAVX512_VBMI);
324
+ features.addIf(bitTest(regs.ecx, 6), CpuFeatures::X86::kAVX512_VBMI2);
325
+ features.addIf(bitTest(regs.ecx, 11), CpuFeatures::X86::kAVX512_VNNI);
326
+ features.addIf(bitTest(regs.ecx, 12), CpuFeatures::X86::kAVX512_BITALG);
327
+ features.addIf(bitTest(regs.ecx, 14), CpuFeatures::X86::kAVX512_VPOPCNTDQ);
328
+ features.addIf(bitTest(regs.edx, 2), CpuFeatures::X86::kAVX512_4VNNIW);
329
+ features.addIf(bitTest(regs.edx, 3), CpuFeatures::X86::kAVX512_4FMAPS);
330
+ features.addIf(bitTest(regs.edx, 8), CpuFeatures::X86::kAVX512_VP2INTERSECT);
331
+ features.addIf(bitTest(regs.edx, 23), CpuFeatures::X86::kAVX512_FP16);
332
+ }
333
+
334
+ // Detect 'AMX'.
335
+ if (amxEnabledByOS) {
336
+ features.addIf(bitTest(regs.edx, 22), CpuFeatures::X86::kAMX_BF16);
337
+ features.addIf(bitTest(regs.edx, 24), CpuFeatures::X86::kAMX_TILE);
338
+ features.addIf(bitTest(regs.edx, 25), CpuFeatures::X86::kAMX_INT8);
339
+ }
340
+ }
341
+
342
+ // CPUID EAX=7 ECX=1
343
+ // -----------------
344
+
345
+ if (features.hasAVX512_F() && maxSubLeafId_0x7 >= 1) {
346
+ cpuidQuery(&regs, 0x7, 1);
347
+
348
+ features.addIf(bitTest(regs.eax, 3), CpuFeatures::X86::kAVX_VNNI);
349
+ features.addIf(bitTest(regs.eax, 5), CpuFeatures::X86::kAVX512_BF16);
350
+ features.addIf(bitTest(regs.eax, 22), CpuFeatures::X86::kHRESET);
351
+ }
352
+
353
+ // CPUID EAX=13 ECX=0
354
+ // ------------------
355
+
356
+ if (maxId >= 0xD) {
357
+ cpuidQuery(&regs, 0xD, 0);
358
+
359
+ // Both CPUID result and XCR0 has to be enabled to have support for MPX.
360
+ if (((regs.eax & xcr0.eax) & 0x00000018u) == 0x00000018u && maybeMPX)
361
+ features.add(CpuFeatures::X86::kMPX);
362
+
363
+ cpuidQuery(&regs, 0xD, 1);
364
+
365
+ features.addIf(bitTest(regs.eax, 0), CpuFeatures::X86::kXSAVEOPT);
366
+ features.addIf(bitTest(regs.eax, 1), CpuFeatures::X86::kXSAVEC);
367
+ features.addIf(bitTest(regs.eax, 3), CpuFeatures::X86::kXSAVES);
368
+ }
369
+
370
+ // CPUID EAX=14 ECX=0
371
+ // ------------------
372
+
373
+ if (maxId >= 0xE) {
374
+ cpuidQuery(&regs, 0xE, 0);
375
+
376
+ features.addIf(bitTest(regs.ebx, 4), CpuFeatures::X86::kPTWRITE);
377
+ }
378
+
379
+ // CPUID EAX=0x80000000...maxId
380
+ // ----------------------------
381
+
382
+ maxId = 0x80000000u;
383
+ uint32_t i = maxId;
384
+
385
+ // The highest EAX that we understand.
386
+ constexpr uint32_t kHighestProcessedEAX = 0x8000001Fu;
387
+
388
+ // Several CPUID calls are required to get the whole branc string. It's easier
389
+ // to copy one DWORD at a time instead of copying the string a byte by byte.
390
+ uint32_t* brand = cpu._brand.u32;
391
+ do {
392
+ cpuidQuery(&regs, i);
393
+ switch (i) {
394
+ case 0x80000000u:
395
+ maxId = Support::min<uint32_t>(regs.eax, kHighestProcessedEAX);
396
+ break;
397
+
398
+ case 0x80000001u:
399
+ features.addIf(bitTest(regs.ecx, 0), CpuFeatures::X86::kLAHFSAHF);
400
+ features.addIf(bitTest(regs.ecx, 2), CpuFeatures::X86::kSVM);
401
+ features.addIf(bitTest(regs.ecx, 5), CpuFeatures::X86::kLZCNT);
402
+ features.addIf(bitTest(regs.ecx, 6), CpuFeatures::X86::kSSE4A);
403
+ features.addIf(bitTest(regs.ecx, 7), CpuFeatures::X86::kMSSE);
404
+ features.addIf(bitTest(regs.ecx, 8), CpuFeatures::X86::kPREFETCHW);
405
+ features.addIf(bitTest(regs.ecx, 12), CpuFeatures::X86::kSKINIT);
406
+ features.addIf(bitTest(regs.ecx, 15), CpuFeatures::X86::kLWP);
407
+ features.addIf(bitTest(regs.ecx, 21), CpuFeatures::X86::kTBM);
408
+ features.addIf(bitTest(regs.ecx, 29), CpuFeatures::X86::kMONITORX);
409
+ features.addIf(bitTest(regs.edx, 20), CpuFeatures::X86::kNX);
410
+ features.addIf(bitTest(regs.edx, 21), CpuFeatures::X86::kFXSROPT);
411
+ features.addIf(bitTest(regs.edx, 22), CpuFeatures::X86::kMMX2);
412
+ features.addIf(bitTest(regs.edx, 27), CpuFeatures::X86::kRDTSCP);
413
+ features.addIf(bitTest(regs.edx, 29), CpuFeatures::X86::kPREFETCHW);
414
+ features.addIf(bitTest(regs.edx, 30), CpuFeatures::X86::k3DNOW2, CpuFeatures::X86::kMMX2);
415
+ features.addIf(bitTest(regs.edx, 31), CpuFeatures::X86::kPREFETCHW);
416
+
417
+ if (features.hasAVX()) {
418
+ features.addIf(bitTest(regs.ecx, 11), CpuFeatures::X86::kXOP);
419
+ features.addIf(bitTest(regs.ecx, 16), CpuFeatures::X86::kFMA4);
420
+ }
421
+
422
+ // This feature seems to be only supported by AMD.
423
+ if (cpu.isVendor("AMD")) {
424
+ features.addIf(bitTest(regs.ecx, 4), CpuFeatures::X86::kALTMOVCR8);
425
+ }
426
+ break;
427
+
428
+ case 0x80000002u:
429
+ case 0x80000003u:
430
+ case 0x80000004u:
431
+ *brand++ = regs.eax;
432
+ *brand++ = regs.ebx;
433
+ *brand++ = regs.ecx;
434
+ *brand++ = regs.edx;
435
+
436
+ // Go directly to the next one we are interested in.
437
+ if (i == 0x80000004u)
438
+ i = 0x80000008u - 1;
439
+ break;
440
+
441
+ case 0x80000008u:
442
+ features.addIf(bitTest(regs.ebx, 0), CpuFeatures::X86::kCLZERO);
443
+ features.addIf(bitTest(regs.ebx, 0), CpuFeatures::X86::kRDPRU);
444
+ features.addIf(bitTest(regs.ebx, 8), CpuFeatures::X86::kMCOMMIT);
445
+ features.addIf(bitTest(regs.ebx, 9), CpuFeatures::X86::kWBNOINVD);
446
+
447
+ // Go directly to the next one we are interested in.
448
+ i = 0x8000001Fu - 1;
449
+ break;
450
+
451
+ case 0x8000001Fu:
452
+ features.addIf(bitTest(regs.eax, 4), CpuFeatures::X86::kSNP);
453
+ break;
454
+ }
455
+ } while (++i <= maxId);
456
+
457
+ // Simplify CPU brand string a bit by removing some unnecessary spaces.
458
+ simplifyCpuBrand(cpu._brand.str);
459
+ }
460
+
461
+ #endif // ASMJIT_ARCH_X86
462
+
463
+ // CpuInfo - Detect - ARM
464
+ // ======================
465
+
466
+ // The most relevant and accurate information can be found here:
467
+ // https://github.com/llvm-project/llvm/blob/master/lib/Target/AArch64/AArch64.td
468
+ // https://github.com/apple/llvm-project/blob/apple/main/llvm/lib/Target/AArch64/AArch64.td (Apple fork)
469
+ //
470
+ // Other resources:
471
+ // https://en.wikipedia.org/wiki/AArch64
472
+ // https://en.wikipedia.org/wiki/Apple_silicon#List_of_Apple_processors
473
+ // https://developer.arm.com/architectures/learn-the-architecture/understanding-the-armv8-x-extensions/single-page
474
+
475
+ #if ASMJIT_ARCH_ARM
476
+
477
+ static inline void populateBaseARMFeatures(CpuInfo& cpu) noexcept {
478
+ #if ASMJIT_ARCH_ARM == 32
479
+ // No baseline flags at the moment.
480
+ DebugUtils::unused(cpu);
481
+ #else
482
+ // AArch64 is based on ARMv8-A and later.
483
+ cpu.addFeature(CpuFeatures::ARM::kARMv6);
484
+ cpu.addFeature(CpuFeatures::ARM::kARMv7);
485
+ cpu.addFeature(CpuFeatures::ARM::kARMv8a);
486
+
487
+ // AArch64 comes with these features by default.
488
+ cpu.addFeature(CpuFeatures::ARM::kVFPv2);
489
+ cpu.addFeature(CpuFeatures::ARM::kVFPv3);
490
+ cpu.addFeature(CpuFeatures::ARM::kVFPv4);
491
+ cpu.addFeature(CpuFeatures::ARM::kASIMD);
492
+ cpu.addFeature(CpuFeatures::ARM::kIDIVA);
493
+ #endif
494
+ }
495
+
496
+ // Detects ARM version by macros defined at compile time. This means that AsmJit will report features forced at
497
+ // compile time that should always be provided by the target CPU. This also means that if we don't provide any
498
+ // means to detect CPU features the features reported by AsmJit will at least not report less features than the
499
+ // target it was compiled to.
500
+ ASMJIT_MAYBE_UNUSED
501
+ static ASMJIT_FAVOR_SIZE void detectARMFeaturesViaCompilerFlags(CpuInfo& cpu) noexcept {
502
+ DebugUtils::unused(cpu);
503
+
504
+ #if ASMJIT_ARCH_ARM == 32
505
+
506
+ // ARM targets have no baseline at the moment.
507
+ # if defined(__ARM_ARCH_7A__)
508
+ cpu.addFeature(CpuFeatures::ARM::kARMv7);
509
+ # endif
510
+ # if defined(__ARM_ARCH_8A__)
511
+ cpu.addFeature(CpuFeatures::ARM::kARMv8a);
512
+ # endif
513
+
514
+ # if defined(__TARGET_ARCH_THUMB)
515
+ cpu.addFeature(CpuFeatures::ARM::kTHUMB);
516
+ # if __TARGET_ARCH_THUMB >= 4
517
+ cpu.addFeature(CpuFeatures::ARM::kTHUMBv2);
518
+ # endif
519
+ # endif
520
+
521
+ # if defined(__ARM_FEATURE_FMA)
522
+ cpu.addFeature(CpuFeatures::ARM::kVFPv3);
523
+ cpu.addFeature(CpuFeatures::ARM::kVFPv4);
524
+ # endif
525
+
526
+ # if defined(__ARM_NEON)
527
+ cpu.addFeature(CpuFeatures::ARM::kASIMD);
528
+ # endif
529
+
530
+ # if defined(__ARM_FEATURE_IDIV) && defined(__TARGET_ARCH_THUMB)
531
+ cpu.addFeature(CpuFeatures::ARM::kIDIVT);
532
+ #endif
533
+ # if defined(__ARM_FEATURE_IDIV) && !defined(__TARGET_ARCH_THUMB)
534
+ cpu.addFeature(CpuFeatures::ARM::kIDIVA);
535
+ # endif
536
+
537
+ #endif
538
+
539
+ #if defined(__ARM_ARCH_8_1A__)
540
+ cpu.addFeature(CpuFeatures::ARM::kARMv8_1a);
541
+ #endif
542
+ #if defined(__ARM_ARCH_8_2A__)
543
+ cpu.addFeature(CpuFeatures::ARM::kARMv8_2a);
544
+ #endif
545
+ #if defined(__ARM_ARCH_8_3A__)
546
+ cpu.addFeature(CpuFeatures::ARM::kARMv8_3a);
547
+ #endif
548
+ #if defined(__ARM_ARCH_8_4A__)
549
+ cpu.addFeature(CpuFeatures::ARM::kARMv8_4a);
550
+ #endif
551
+ #if defined(__ARM_ARCH_8_5A__)
552
+ cpu.addFeature(CpuFeatures::ARM::kARMv8_5a);
553
+ #endif
554
+ #if defined(__ARM_ARCH_8_6A__)
555
+ cpu.addFeature(CpuFeatures::ARM::kARMv8_6a);
556
+ #endif
557
+ #if defined(__ARM_ARCH_8_7A__)
558
+ cpu.addFeature(CpuFeatures::ARM::kARMv8_7a);
559
+ #endif
560
+
561
+ #if defined(__ARM_FEATURE_AES)
562
+ cpu.addFeature(CpuFeatures::ARM::kAES);
563
+ #endif
564
+
565
+ #if defined(__ARM_FEATURE_BF16_SCALAR_ARITHMETIC) && defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC)
566
+ cpu.addFeature(CpuFeatures::ARM::kBF16);
567
+ #endif
568
+
569
+ #if defined(__ARM_FEATURE_CRC32)
570
+ cpu.addFeature(CpuFeatures::ARM::kCRC32);
571
+ #endif
572
+
573
+ #if defined(__ARM_FEATURE_CRYPTO)
574
+ cpu.addFeature(CpuFeatures::ARM::kAES,
575
+ CpuFeatures::ARM::kSHA1,
576
+ CpuFeatures::ARM::kSHA2);
577
+ #endif
578
+
579
+ #if defined(__ARM_FEATURE_DOTPROD)
580
+ cpu.addFeature(CpuFeatures::ARM::kDOTPROD);
581
+ #endif
582
+
583
+ #if defined(__ARM_FEATURE_FP16FML) || defined(__ARM_FEATURE_FP16_FML)
584
+ cpu.addFeature(CpuFeatures::ARM::kFP16FML);
585
+ #endif
586
+
587
+ #if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC)
588
+ cpu.addFeature(CpuFeatures::ARM::kFP16FULL);
589
+ #endif
590
+
591
+ #if defined(__ARM_FEATURE_FRINT)
592
+ cpu.addFeature(CpuFeatures::ARM::kFRINT);
593
+ #endif
594
+
595
+ #if defined(__ARM_FEATURE_JCVT)
596
+ cpu.addFeature(CpuFeatures::ARM::kFJCVTZS);
597
+ #endif
598
+
599
+ #if defined(__ARM_FEATURE_MATMUL_INT8)
600
+ cpu.addFeature(CpuFeatures::ARM::kI8MM);
601
+ #endif
602
+
603
+ #if defined(__ARM_FEATURE_ATOMICS)
604
+ cpu.addFeature(CpuFeatures::ARM::kLSE);
605
+ #endif
606
+
607
+ #if defined(__ARM_FEATURE_MEMORY_TAGGING)
608
+ cpu.addFeature(CpuFeatures::ARM::kMTE);
609
+ #endif
610
+
611
+ #if defined(__ARM_FEATURE_QRDMX)
612
+ cpu.addFeature(CpuFeatures::ARM::kRDM);
613
+ #endif
614
+
615
+ #if defined(__ARM_FEATURE_RNG)
616
+ cpu.addFeature(CpuFeatures::ARM::kRNG);
617
+ #endif
618
+
619
+ #if defined(__ARM_FEATURE_SHA2)
620
+ cpu.addFeature(CpuFeatures::ARM::kSHA2);
621
+ #endif
622
+
623
+ #if defined(__ARM_FEATURE_SHA3)
624
+ cpu.addFeature(CpuFeatures::ARM::kSHA3);
625
+ #endif
626
+
627
+ #if defined(__ARM_FEATURE_SHA512)
628
+ cpu.addFeature(CpuFeatures::ARM::kSHA512);
629
+ #endif
630
+
631
+ #if defined(__ARM_FEATURE_SM3)
632
+ cpu.addFeature(CpuFeatures::ARM::kSM3);
633
+ #endif
634
+
635
+ #if defined(__ARM_FEATURE_SM4)
636
+ cpu.addFeature(CpuFeatures::ARM::kSM4);
637
+ #endif
638
+
639
+ #if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_SVE_VECTOR_OPERATORS)
640
+ cpu.addFeature(CpuFeatures::ARM::kSVE);
641
+ #endif
642
+
643
+ #if defined(__ARM_FEATURE_SVE_MATMUL_INT8)
644
+ cpu.addFeature(CpuFeatures::ARM::kSVE_I8MM);
645
+ #endif
646
+
647
+ #if defined(__ARM_FEATURE_SVE_MATMUL_FP32)
648
+ cpu.addFeature(CpuFeatures::ARM::kSVE_F32MM);
649
+ #endif
650
+
651
+ #if defined(__ARM_FEATURE_SVE_MATMUL_FP64)
652
+ cpu.addFeature(CpuFeatures::ARM::kSVE_F64MM);
653
+ #endif
654
+
655
+ #if defined(__ARM_FEATURE_SVE2)
656
+ cpu.addFeature(CpuFeatures::ARM::kSVE2);
657
+ #endif
658
+
659
+ #if defined(__ARM_FEATURE_SVE2_AES)
660
+ cpu.addFeature(CpuFeatures::ARM::kSVE2_AES);
661
+ #endif
662
+
663
+ #if defined(__ARM_FEATURE_SVE2_BITPERM)
664
+ cpu.addFeature(CpuFeatures::ARM::kSVE2_BITPERM);
665
+ #endif
666
+
667
+ #if defined(__ARM_FEATURE_SVE2_SHA3)
668
+ cpu.addFeature(CpuFeatures::ARM::kSVE2_SHA3);
669
+ #endif
670
+
671
+ #if defined(__ARM_FEATURE_SVE2_SM4)
672
+ cpu.addFeature(CpuFeatures::ARM::kSVE2_SM4);
673
+ #endif
674
+
675
+ #if defined(__ARM_FEATURE_TME)
676
+ cpu.addFeature(CpuFeatures::ARM::kTME);
677
+ #endif
678
+ }
679
+
680
+ ASMJIT_MAYBE_UNUSED
681
+ static ASMJIT_FAVOR_SIZE void expandARMFeaturesByVersion(CpuInfo& cpu) noexcept {
682
+ CpuFeatures::ARM& features = cpu.features().arm();
683
+
684
+ if (features.hasARMv8_7a()) {
685
+ features.add(CpuFeatures::ARM::kARMv8_6a);
686
+ }
687
+
688
+ if (features.hasARMv8_6a()) {
689
+ features.add(CpuFeatures::ARM::kARMv8_5a,
690
+ CpuFeatures::ARM::kBF16);
691
+
692
+ if (features.hasSVE())
693
+ features.add(CpuFeatures::ARM::kSVE_I8MM);
694
+ }
695
+
696
+ if (features.hasARMv8_5a()) {
697
+ features.add(CpuFeatures::ARM::kARMv8_4a,
698
+ CpuFeatures::ARM::kALTNZCV,
699
+ CpuFeatures::ARM::kBTI,
700
+ CpuFeatures::ARM::kFRINT,
701
+ CpuFeatures::ARM::kSB,
702
+ CpuFeatures::ARM::kSSBS);
703
+ }
704
+
705
+ if (features.hasARMv8_4a()) {
706
+ features.add(CpuFeatures::ARM::kARMv8_3a,
707
+ CpuFeatures::ARM::kDIT,
708
+ CpuFeatures::ARM::kDOTPROD,
709
+ CpuFeatures::ARM::kFLAGM,
710
+ CpuFeatures::ARM::kPMU,
711
+ CpuFeatures::ARM::kRCPC_IMMO);
712
+ }
713
+
714
+ if (features.hasARMv8_3a()) {
715
+ features.add(CpuFeatures::ARM::kARMv8_2a,
716
+ CpuFeatures::ARM::kFCMA,
717
+ CpuFeatures::ARM::kFJCVTZS);
718
+ }
719
+
720
+ if (features.hasARMv8_2a()) {
721
+ features.add(CpuFeatures::ARM::kARMv8_1a);
722
+ }
723
+
724
+ if (features.hasARMv8_1a()) {
725
+ features.add(CpuFeatures::ARM::kARMv8a,
726
+ CpuFeatures::ARM::kCRC32,
727
+ CpuFeatures::ARM::kLSE,
728
+ CpuFeatures::ARM::kRDM);
729
+ }
730
+
731
+ if (features.hasARMv8a()) {
732
+ features.add(CpuFeatures::ARM::kARMv7,
733
+ CpuFeatures::ARM::kVFPv2,
734
+ CpuFeatures::ARM::kVFPv3,
735
+ CpuFeatures::ARM::kVFPv4,
736
+ CpuFeatures::ARM::kVFP_D32,
737
+ CpuFeatures::ARM::kASIMD,
738
+ CpuFeatures::ARM::kIDIVA);
739
+ }
740
+ }
741
+
742
+ // CpuInfo - Detect - ARM [Windows]
743
+ // ================================
744
+
745
+ #if defined(_WIN32)
746
+ struct WinPFPMapping {
747
+ uint8_t featureId;
748
+ uint8_t pfpFeatureId;
749
+ };
750
+
751
+ static ASMJIT_FAVOR_SIZE void detectPFPFeatures(CpuInfo& cpu, const WinPFPMapping* mapping, size_t size) noexcept {
752
+ for (size_t i = 0; i < size; i++)
753
+ if (::IsProcessorFeaturePresent(mapping[i].pfpFeatureId))
754
+ cpu.addFeature(mapping[i].featureId);
755
+ }
756
+
757
+ //! Detect ARM CPU features on Windows.
758
+ //!
759
+ //! The detection is based on `IsProcessorFeaturePresent()` API call.
760
+ static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
761
+ cpu._wasDetected = true;
762
+ populateBaseARMFeatures(cpu);
763
+
764
+ CpuFeatures::ARM& features = cpu.features().arm();
765
+
766
+ // Win32 for ARM requires ARMv7 with DSP extensions, VFPv3, and uses THUMBv2 by default.
767
+ #if ASMJIT_ARCH_ARM == 32
768
+ features.add(CpuFeatures::ARM::kTHUMB);
769
+ features.add(CpuFeatures::ARM::kTHUMBv2);
770
+ features.add(CpuFeatures::ARM::kARMv6);
771
+ features.add(CpuFeatures::ARM::kARMv7);
772
+ features.add(CpuFeatures::ARM::kEDSP);
773
+ features.add(CpuFeatures::ARM::kVFPv2);
774
+ features.add(CpuFeatures::ARM::kVFPv3);
775
+ #endif
776
+
777
+ // Windows for ARM requires ASIMD.
778
+ features.add(CpuFeatures::ARM::kASIMD);
779
+
780
+ // Detect additional CPU features by calling `IsProcessorFeaturePresent()`.
781
+ static const WinPFPMapping mapping[] = {
782
+ #if ASMJIT_ARCH_ARM == 32
783
+ { uint8_t(CpuFeatures::ARM::kVFP_D32) , 18 }, // PF_ARM_VFP_32_REGISTERS_AVAILABLE
784
+ { uint8_t(CpuFeatures::ARM::kIDIVT) , 24 }, // PF_ARM_DIVIDE_INSTRUCTION_AVAILABLE
785
+ { uint8_t(CpuFeatures::ARM::kVFPv4) , 27 }, // PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE
786
+ { uint8_t(CpuFeatures::ARM::kARMv8a) , 29 }, // PF_ARM_V8_INSTRUCTIONS_AVAILABLE
787
+ #endif
788
+ { uint8_t(CpuFeatures::ARM::kAES) , 30 }, // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE
789
+ { uint8_t(CpuFeatures::ARM::kCRC32) , 31 }, // PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE
790
+ { uint8_t(CpuFeatures::ARM::kLSE) , 34 } // PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE
791
+
792
+ };
793
+ detectPFPFeatures(cpu, mapping, ASMJIT_ARRAY_SIZE(mapping));
794
+
795
+ // Windows provides several instructions under a single flag:
796
+ if (features.hasAES()) {
797
+ features.add(CpuFeatures::ARM::kSHA1,
798
+ CpuFeatures::ARM::kSHA2);
799
+ }
800
+
801
+ expandARMFeaturesByVersion(cpu);
802
+ }
803
+
804
+ // CpuInfo - Detect - ARM [Linux]
805
+ // ==============================
806
+
807
+ #elif defined(__linux__)
808
+
809
+ struct LinuxHWCapMapping {
810
+ uint8_t featureId;
811
+ uint8_t hwCapBit;
812
+ };
813
+
814
+ static ASMJIT_FAVOR_SIZE void detectHWCaps(CpuInfo& cpu, unsigned long type, const LinuxHWCapMapping* mapping, size_t size) noexcept {
815
+ unsigned long mask = getauxval(type);
816
+ for (size_t i = 0; i < size; i++)
817
+ cpu.features().addIf(Support::bitTest(mask, mapping[i].hwCapBit), mapping[i].featureId);
818
+ }
819
+
820
+ #if ASMJIT_ARCH_ARM == 32
821
+
822
+ // `AT_HWCAP` provides ARMv7 (and less) related flags.
823
+ static const LinuxHWCapMapping hwCapMapping[] = {
824
+ { uint8_t(CpuFeatures::ARM::kVFPv2) , 6 }, // HWCAP_VFP
825
+ { uint8_t(CpuFeatures::ARM::kEDSP) , 7 }, // HWCAP_EDSP
826
+ { uint8_t(CpuFeatures::ARM::kASIMD) , 12 }, // HWCAP_NEON
827
+ { uint8_t(CpuFeatures::ARM::kVFPv3) , 13 }, // HWCAP_VFPv3
828
+ { uint8_t(CpuFeatures::ARM::kVFPv4) , 16 }, // HWCAP_VFPv4
829
+ { uint8_t(CpuFeatures::ARM::kIDIVA) , 17 }, // HWCAP_IDIVA
830
+ { uint8_t(CpuFeatures::ARM::kIDIVT) , 18 }, // HWCAP_IDIVT
831
+ { uint8_t(CpuFeatures::ARM::kVFP_D32) , 19 } // HWCAP_VFPD32
832
+ };
833
+
834
+ // `AT_HWCAP2` provides ARMv8+ related flags.
835
+ static const LinuxHWCapMapping hwCap2Mapping[] = {
836
+ { uint8_t(CpuFeatures::ARM::kAES) , 0 }, // HWCAP2_AES
837
+ { uint8_t(CpuFeatures::ARM::kPMULL) , 1 }, // HWCAP2_PMULL
838
+ { uint8_t(CpuFeatures::ARM::kSHA1) , 2 }, // HWCAP2_SHA1
839
+ { uint8_t(CpuFeatures::ARM::kSHA2) , 3 }, // HWCAP2_SHA2
840
+ { uint8_t(CpuFeatures::ARM::kCRC32) , 4 } // HWCAP2_CRC32
841
+ };
842
+
843
+ static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
844
+ cpu._wasDetected = true;
845
+
846
+ populateBaseARMFeatures(cpu);
847
+
848
+ CpuFeatures::ARM& features = cpu.features().arm();
849
+
850
+ detectHWCaps(cpu, AT_HWCAP, hwCapMapping, ASMJIT_ARRAY_SIZE(hwCapMapping));
851
+ detectHWCaps(cpu, AT_HWCAP2, hwCap2Mapping, ASMJIT_ARRAY_SIZE(hwCap2Mapping));
852
+
853
+ // VFPv3 implies VFPv2.
854
+ if (features.hasVFPv3())
855
+ features.add(CpuFeatures::ARM::kVFPv2);
856
+
857
+ // VFPv2 implies ARMv6.
858
+ if (features.hasVFPv2())
859
+ features.add(CpuFeatures::ARM::kARMv6);
860
+
861
+ // ARMv7 provides VFPv3|ASIMD.
862
+ if (features.hasVFPv3() || features.hasASIMD())
863
+ features.add(CpuFeatures::ARM::kARMv7);
864
+
865
+ // ARMv8 provives AES, CRC32, PMULL, SHA1, and SHA2.
866
+ if (features.hasAES() || features.hasCRC32() || features.hasPMULL() || features.hasSHA1() || features.hasSHA2())
867
+ features.add(CpuFeatures::ARM::kARMv8a);
868
+ }
869
+
870
+ #else
871
+
872
+ // `AT_HWCAP` provides ARMv8+ related flags.
873
+ static const LinuxHWCapMapping hwCapMapping[] = {
874
+ /*
875
+ { uint8_t(CpuFeatures::ARM::k) , 0 }, // HWCAP_FP
876
+ */
877
+ { uint8_t(CpuFeatures::ARM::kASIMD) , 1 }, // HWCAP_ASIMD
878
+ /*
879
+ { uint8_t(CpuFeatures::ARM::k) , 2 }, // HWCAP_EVTSTRM
880
+ */
881
+ { uint8_t(CpuFeatures::ARM::kAES) , 3 }, // HWCAP_AES
882
+ { uint8_t(CpuFeatures::ARM::kPMULL) , 4 }, // HWCAP_PMULL
883
+ { uint8_t(CpuFeatures::ARM::kSHA1) , 5 }, // HWCAP_SHA1
884
+ { uint8_t(CpuFeatures::ARM::kSHA2) , 6 }, // HWCAP_SHA2
885
+ { uint8_t(CpuFeatures::ARM::kCRC32) , 7 }, // HWCAP_CRC32
886
+ { uint8_t(CpuFeatures::ARM::kLSE) , 8 }, // HWCAP_ATOMICS
887
+ { uint8_t(CpuFeatures::ARM::kFP16CONV) , 9 }, // HWCAP_FPHP
888
+ { uint8_t(CpuFeatures::ARM::kFP16FULL) , 10 }, // HWCAP_ASIMDHP
889
+ { uint8_t(CpuFeatures::ARM::kCPUID) , 11 }, // HWCAP_CPUID
890
+ { uint8_t(CpuFeatures::ARM::kRDM) , 12 }, // HWCAP_ASIMDRDM
891
+ { uint8_t(CpuFeatures::ARM::kFJCVTZS) , 13 }, // HWCAP_JSCVT
892
+ { uint8_t(CpuFeatures::ARM::kFCMA) , 14 }, // HWCAP_FCMA
893
+ /*
894
+ { uint8_t(CpuFeatures::ARM::k) , 15 }, // HWCAP_LRCPC
895
+ { uint8_t(CpuFeatures::ARM::k) , 16 }, // HWCAP_DCPOP
896
+ */
897
+ { uint8_t(CpuFeatures::ARM::kSHA3) , 17 }, // HWCAP_SHA3
898
+ { uint8_t(CpuFeatures::ARM::kSM3) , 18 }, // HWCAP_SM3
899
+ { uint8_t(CpuFeatures::ARM::kSM4) , 19 }, // HWCAP_SM4
900
+ { uint8_t(CpuFeatures::ARM::kDOTPROD) , 20 }, // HWCAP_ASIMDDP
901
+ { uint8_t(CpuFeatures::ARM::kSHA512) , 21 }, // HWCAP_SHA512
902
+ { uint8_t(CpuFeatures::ARM::kSVE) , 22 }, // HWCAP_SVE
903
+ { uint8_t(CpuFeatures::ARM::kFP16FML) , 23 }, // HWCAP_ASIMDFHM
904
+ { uint8_t(CpuFeatures::ARM::kDIT) , 24 }, // HWCAP_DIT
905
+ /*
906
+ { uint8_t(CpuFeatures::ARM::k) , 25 }, // HWCAP_USCAT
907
+ { uint8_t(CpuFeatures::ARM::k) , 26 }, // HWCAP_ILRCPC
908
+ */
909
+ { uint8_t(CpuFeatures::ARM::kFLAGM) , 27 }, // HWCAP_FLAGM
910
+ { uint8_t(CpuFeatures::ARM::kSSBS) , 28 }, // HWCAP_SSBS
911
+ { uint8_t(CpuFeatures::ARM::kSB) , 29 } // HWCAP_SB
912
+ /*
913
+ { uint8_t(CpuFeatures::ARM::k) , 30 }, // HWCAP_PACA
914
+ { uint8_t(CpuFeatures::ARM::k) , 31 } // HWCAP_PACG
915
+ */
916
+ };
917
+
918
+ // `AT_HWCAP2` provides ARMv8+ related flags.
919
+ static const LinuxHWCapMapping hwCapMapping2[] = {
920
+ /*
921
+ { uint8_t(CpuFeatures::ARM::k) , 0 }, // HWCAP2_DCPODP
922
+ */
923
+ { uint8_t(CpuFeatures::ARM::kSVE2) , 1 }, // HWCAP2_SVE2
924
+ { uint8_t(CpuFeatures::ARM::kSVE2_AES) , 2 }, // HWCAP2_SVEAES
925
+ { uint8_t(CpuFeatures::ARM::kSVE_PMULL) , 3 }, // HWCAP2_SVEPMULL
926
+ { uint8_t(CpuFeatures::ARM::kSVE2_BITPERM), 4 }, // HWCAP2_SVEBITPERM
927
+ { uint8_t(CpuFeatures::ARM::kSVE2_SHA3) , 5 }, // HWCAP2_SVESHA3
928
+ { uint8_t(CpuFeatures::ARM::kSVE2_SM4) , 6 }, // HWCAP2_SVESM4
929
+ { uint8_t(CpuFeatures::ARM::kALTNZCV) , 7 }, // HWCAP2_FLAGM2
930
+ { uint8_t(CpuFeatures::ARM::kFRINT) , 8 }, // HWCAP2_FRINT
931
+ { uint8_t(CpuFeatures::ARM::kSVE_I8MM) , 9 }, // HWCAP2_SVEI8MM
932
+ { uint8_t(CpuFeatures::ARM::kSVE_F32MM) , 10 }, // HWCAP2_SVEF32MM
933
+ { uint8_t(CpuFeatures::ARM::kSVE_F64MM) , 11 }, // HWCAP2_SVEF64MM
934
+ { uint8_t(CpuFeatures::ARM::kSVE_BF16) , 12 }, // HWCAP2_SVEBF16
935
+ { uint8_t(CpuFeatures::ARM::kI8MM) , 13 }, // HWCAP2_I8MM
936
+ { uint8_t(CpuFeatures::ARM::kBF16) , 14 }, // HWCAP2_BF16
937
+ { uint8_t(CpuFeatures::ARM::kDGH) , 15 }, // HWCAP2_DGH
938
+ { uint8_t(CpuFeatures::ARM::kRNG) , 16 }, // HWCAP2_RNG
939
+ { uint8_t(CpuFeatures::ARM::kBTI) , 17 }, // HWCAP2_BTI
940
+ { uint8_t(CpuFeatures::ARM::kMTE) , 18 } // HWCAP2_MTE
941
+ };
942
+
943
+ static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
944
+ cpu._wasDetected = true;
945
+ populateBaseARMFeatures(cpu);
946
+
947
+ detectHWCaps(cpu, AT_HWCAP, hwCapMapping, ASMJIT_ARRAY_SIZE(hwCapMapping));
948
+ detectHWCaps(cpu, AT_HWCAP2, hwCapMapping2, ASMJIT_ARRAY_SIZE(hwCapMapping2));
949
+ }
950
+
951
+ #endif
952
+
953
+ // CpuInfo - Detect - ARM [Apple]
954
+ // ==============================
955
+
956
+ #elif defined(__APPLE__)
957
+
958
+ namespace AppleHWId {
959
+ enum CpuFamily : uint32_t {
960
+ // Generic ARM.
961
+ kCpuFamily_ARM_9 = 0xE73283AEu,
962
+ kCpuFamily_ARM_11 = 0x8FF620D8u,
963
+ kCpuFamily_ARM_12 = 0xBD1B0AE9u,
964
+ kCpuFamily_ARM_13 = 0x0CC90E64u,
965
+ kCpuFamily_ARM_14 = 0x96077EF1u,
966
+ kCpuFamily_ARM_15 = 0xA8511BCAu,
967
+
968
+ // Apple design.
969
+ kCpuFamily_SWIFT = 0x1E2D6381u,
970
+ kCpuFamily_CYCLONE = 0x37A09642u,
971
+ kCpuFamily_TYPHOON = 0x2C91A47Eu,
972
+ kCpuFamily_TWISTER = 0x92FB37C8u,
973
+ kCpuFamily_HURRICANE = 0x67CEEE93u,
974
+ kCpuFamily_MONSOON_MISTRAL = 0xE81E7EF6u,
975
+ kCpuFamily_VORTEX_TEMPEST = 0x07D34B9Fu,
976
+ kCpuFamily_LIGHTNING_THUNDER = 0x462504D2u,
977
+ kCpuFamily_FIRESTORM_ICESTORM = 0x1B588BB3u
978
+ };
979
+ };
980
+
981
+ static ASMJIT_FAVOR_SIZE uint32_t queryARMCpuFamilyId() noexcept {
982
+ uint32_t result = 0;
983
+ size_t size = sizeof(result);
984
+
985
+ int res = sysctlbyname("hw.cpufamily", &result, &size, nullptr, 0);
986
+ if (res != 0)
987
+ return 0;
988
+ else
989
+ return result;
990
+ }
991
+
992
+ static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
993
+ cpu._wasDetected = true;
994
+ populateBaseARMFeatures(cpu);
995
+
996
+ uint32_t cpuFamilyId = queryARMCpuFamilyId();
997
+ CpuFeatures::ARM& features = cpu.features().arm();
998
+
999
+ switch (cpuFamilyId) {
1000
+ case AppleHWId::kCpuFamily_ARM_9:
1001
+ case AppleHWId::kCpuFamily_ARM_11:
1002
+ case AppleHWId::kCpuFamily_ARM_12:
1003
+ break;
1004
+
1005
+ // ARM Cortex A8.
1006
+ case AppleHWId::kCpuFamily_ARM_13:
1007
+ break;
1008
+
1009
+ // ARM Cortex A9.
1010
+ case AppleHWId::kCpuFamily_ARM_14:
1011
+ break;
1012
+
1013
+ // ARM Cortex A7 - ARMv7k.
1014
+ case AppleHWId::kCpuFamily_ARM_15:
1015
+ features.add(CpuFeatures::ARM::kARMv7);
1016
+ break;
1017
+
1018
+ // Apple A6/A6X - ARMv7s.
1019
+ case AppleHWId::kCpuFamily_SWIFT:
1020
+ features.add(CpuFeatures::ARM::kARMv7);
1021
+ break;
1022
+
1023
+ // Apple A7 - ARMv8.0-A.
1024
+ case AppleHWId::kCpuFamily_CYCLONE:
1025
+ features.add(CpuFeatures::ARM::kARMv8a,
1026
+ CpuFeatures::ARM::kAES,
1027
+ CpuFeatures::ARM::kSHA1,
1028
+ CpuFeatures::ARM::kSHA2);
1029
+ break;
1030
+
1031
+ // Apple A8 - ARMv8.0-A.
1032
+ case AppleHWId::kCpuFamily_TYPHOON:
1033
+ features.add(CpuFeatures::ARM::kARMv8a,
1034
+ CpuFeatures::ARM::kAES,
1035
+ CpuFeatures::ARM::kSHA1,
1036
+ CpuFeatures::ARM::kSHA2);
1037
+ break;
1038
+
1039
+ // Apple A9 - ARMv8.0-A.
1040
+ case AppleHWId::kCpuFamily_TWISTER:
1041
+ features.add(CpuFeatures::ARM::kARMv8a,
1042
+ CpuFeatures::ARM::kAES,
1043
+ CpuFeatures::ARM::kSHA1,
1044
+ CpuFeatures::ARM::kSHA2);
1045
+ break;
1046
+
1047
+ // Apple A10 - ARMv8.1-A.
1048
+ case AppleHWId::kCpuFamily_HURRICANE:
1049
+ features.add(CpuFeatures::ARM::kARMv8_1a,
1050
+ CpuFeatures::ARM::kAES,
1051
+ CpuFeatures::ARM::kRDM,
1052
+ CpuFeatures::ARM::kSHA1,
1053
+ CpuFeatures::ARM::kSHA2);
1054
+
1055
+ break;
1056
+
1057
+ // Apple A11 - ARMv8.2-A.
1058
+ case AppleHWId::kCpuFamily_MONSOON_MISTRAL:
1059
+ features.add(CpuFeatures::ARM::kARMv8_2a,
1060
+ CpuFeatures::ARM::kAES,
1061
+ CpuFeatures::ARM::kFP16FULL,
1062
+ CpuFeatures::ARM::kSHA1,
1063
+ CpuFeatures::ARM::kSHA2);
1064
+ break;
1065
+
1066
+ // Apple A12 - ARMv8.3-A.
1067
+ case AppleHWId::kCpuFamily_VORTEX_TEMPEST:
1068
+ features.add(CpuFeatures::ARM::kARMv8_3a,
1069
+ CpuFeatures::ARM::kAES,
1070
+ CpuFeatures::ARM::kFP16FULL,
1071
+ CpuFeatures::ARM::kSHA1,
1072
+ CpuFeatures::ARM::kSHA2);
1073
+ break;
1074
+
1075
+ // Apple A13 - ARMv8.4-A.
1076
+ case AppleHWId::kCpuFamily_LIGHTNING_THUNDER:
1077
+ features.add(CpuFeatures::ARM::kARMv8_4a,
1078
+ CpuFeatures::ARM::kAES,
1079
+ CpuFeatures::ARM::kFP16FML,
1080
+ CpuFeatures::ARM::kFP16FULL,
1081
+ CpuFeatures::ARM::kSHA1,
1082
+ CpuFeatures::ARM::kSHA2,
1083
+ CpuFeatures::ARM::kSHA3,
1084
+ CpuFeatures::ARM::kSHA512);
1085
+ break;
1086
+
1087
+ // Apple A14/M1 - ARMv8.5-A.
1088
+ case AppleHWId::kCpuFamily_FIRESTORM_ICESTORM:
1089
+ features.add(CpuFeatures::ARM::kARMv8_4a,
1090
+ CpuFeatures::ARM::kAES,
1091
+ CpuFeatures::ARM::kALTNZCV,
1092
+ CpuFeatures::ARM::kFP16FML,
1093
+ CpuFeatures::ARM::kFP16FULL,
1094
+ CpuFeatures::ARM::kFRINT,
1095
+ CpuFeatures::ARM::kSB,
1096
+ CpuFeatures::ARM::kSHA1,
1097
+ CpuFeatures::ARM::kSHA2,
1098
+ CpuFeatures::ARM::kSHA3,
1099
+ CpuFeatures::ARM::kSHA512,
1100
+ CpuFeatures::ARM::kSSBS);
1101
+ break;
1102
+
1103
+ default:
1104
+ cpu._wasDetected = false;
1105
+ break;
1106
+ }
1107
+
1108
+ expandARMFeaturesByVersion(cpu);
1109
+ }
1110
+
1111
+ // CpuInfo - Detect - ARM [Unknown]
1112
+ // ================================
1113
+
1114
+ #else
1115
+
1116
+ #if ASMJIT_ARCH_ARM == 64
1117
+ #pragma message("[asmjit] Disabling runtime CPU detection - unsupported OS/CPU combination (Unknown OS with AArch64 CPU)")
1118
+ #else
1119
+ #pragma message("[asmjit] Disabling runtime CPU detection - unsupported OS/CPU combination (Unknown OS with ARM CPU)")
1120
+ #endif
1121
+
1122
+ static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
1123
+ populateBaseARMFeatures(cpu);
1124
+ detectARMFeaturesViaCompilerFlags(cpu);
1125
+ expandARMFeaturesByVersion(cpu);
1126
+ }
1127
+ #endif
1128
+
1129
+ #endif
1130
+
1131
+ // CpuInfo - Detect - Host
1132
+ // =======================
1133
+
1134
+ static uint32_t cpuInfoInitialized;
1135
+ static CpuInfo cpuInfoGlobal(Globals::NoInit);
1136
+
1137
+ const CpuInfo& CpuInfo::host() noexcept {
1138
+ // This should never cause a problem as the resulting information should always be the same. In the worst case we
1139
+ // would just overwrite it non-atomically.
1140
+ if (!cpuInfoInitialized) {
1141
+ CpuInfo cpuInfoLocal;
1142
+
1143
+ cpuInfoLocal._arch = Arch::kHost;
1144
+ cpuInfoLocal._subArch = SubArch::kHost;
1145
+
1146
+ #if ASMJIT_ARCH_X86
1147
+ detectX86Cpu(cpuInfoLocal);
1148
+ #elif ASMJIT_ARCH_ARM
1149
+ detectARMCpu(cpuInfoLocal);
1150
+ #else
1151
+ #pragma message("[asmjit] Disabling runtime CPU detection - unsupported OS/CPU combination (Unknown CPU)")
1152
+ #endif
1153
+
1154
+ cpuInfoLocal._hwThreadCount = detectHWThreadCount();
1155
+ cpuInfoGlobal = cpuInfoLocal;
1156
+ cpuInfoInitialized = 1;
1157
+ }
1158
+
1159
+ return cpuInfoGlobal;
1160
+ }
1161
+
1162
+ ASMJIT_END_NAMESPACE