asmjit 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +1 -1
  3. data/asmjit.gemspec +1 -1
  4. data/ext/asmjit/asmjit/.editorconfig +10 -0
  5. data/ext/asmjit/asmjit/.github/FUNDING.yml +1 -0
  6. data/ext/asmjit/asmjit/.github/workflows/build-config.json +47 -0
  7. data/ext/asmjit/asmjit/.github/workflows/build.yml +156 -0
  8. data/ext/asmjit/asmjit/.gitignore +6 -0
  9. data/ext/asmjit/asmjit/CMakeLists.txt +611 -0
  10. data/ext/asmjit/asmjit/LICENSE.md +17 -0
  11. data/ext/asmjit/asmjit/README.md +69 -0
  12. data/ext/asmjit/asmjit/src/asmjit/a64.h +62 -0
  13. data/ext/asmjit/asmjit/src/asmjit/arm/a64archtraits_p.h +81 -0
  14. data/ext/asmjit/asmjit/src/asmjit/arm/a64assembler.cpp +5115 -0
  15. data/ext/asmjit/asmjit/src/asmjit/arm/a64assembler.h +72 -0
  16. data/ext/asmjit/asmjit/src/asmjit/arm/a64builder.cpp +51 -0
  17. data/ext/asmjit/asmjit/src/asmjit/arm/a64builder.h +57 -0
  18. data/ext/asmjit/asmjit/src/asmjit/arm/a64compiler.cpp +60 -0
  19. data/ext/asmjit/asmjit/src/asmjit/arm/a64compiler.h +247 -0
  20. data/ext/asmjit/asmjit/src/asmjit/arm/a64emithelper.cpp +464 -0
  21. data/ext/asmjit/asmjit/src/asmjit/arm/a64emithelper_p.h +50 -0
  22. data/ext/asmjit/asmjit/src/asmjit/arm/a64emitter.h +1228 -0
  23. data/ext/asmjit/asmjit/src/asmjit/arm/a64formatter.cpp +298 -0
  24. data/ext/asmjit/asmjit/src/asmjit/arm/a64formatter_p.h +59 -0
  25. data/ext/asmjit/asmjit/src/asmjit/arm/a64func.cpp +189 -0
  26. data/ext/asmjit/asmjit/src/asmjit/arm/a64func_p.h +33 -0
  27. data/ext/asmjit/asmjit/src/asmjit/arm/a64globals.h +1894 -0
  28. data/ext/asmjit/asmjit/src/asmjit/arm/a64instapi.cpp +278 -0
  29. data/ext/asmjit/asmjit/src/asmjit/arm/a64instapi_p.h +41 -0
  30. data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb.cpp +1957 -0
  31. data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb.h +74 -0
  32. data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb_p.h +876 -0
  33. data/ext/asmjit/asmjit/src/asmjit/arm/a64operand.cpp +85 -0
  34. data/ext/asmjit/asmjit/src/asmjit/arm/a64operand.h +312 -0
  35. data/ext/asmjit/asmjit/src/asmjit/arm/a64rapass.cpp +852 -0
  36. data/ext/asmjit/asmjit/src/asmjit/arm/a64rapass_p.h +105 -0
  37. data/ext/asmjit/asmjit/src/asmjit/arm/a64utils.h +179 -0
  38. data/ext/asmjit/asmjit/src/asmjit/arm/armformatter.cpp +143 -0
  39. data/ext/asmjit/asmjit/src/asmjit/arm/armformatter_p.h +44 -0
  40. data/ext/asmjit/asmjit/src/asmjit/arm/armglobals.h +21 -0
  41. data/ext/asmjit/asmjit/src/asmjit/arm/armoperand.h +621 -0
  42. data/ext/asmjit/asmjit/src/asmjit/arm.h +62 -0
  43. data/ext/asmjit/asmjit/src/asmjit/asmjit-scope-begin.h +17 -0
  44. data/ext/asmjit/asmjit/src/asmjit/asmjit-scope-end.h +9 -0
  45. data/ext/asmjit/asmjit/src/asmjit/asmjit.h +33 -0
  46. data/ext/asmjit/asmjit/src/asmjit/core/api-build_p.h +55 -0
  47. data/ext/asmjit/asmjit/src/asmjit/core/api-config.h +613 -0
  48. data/ext/asmjit/asmjit/src/asmjit/core/archcommons.h +229 -0
  49. data/ext/asmjit/asmjit/src/asmjit/core/archtraits.cpp +160 -0
  50. data/ext/asmjit/asmjit/src/asmjit/core/archtraits.h +290 -0
  51. data/ext/asmjit/asmjit/src/asmjit/core/assembler.cpp +406 -0
  52. data/ext/asmjit/asmjit/src/asmjit/core/assembler.h +129 -0
  53. data/ext/asmjit/asmjit/src/asmjit/core/builder.cpp +889 -0
  54. data/ext/asmjit/asmjit/src/asmjit/core/builder.h +1391 -0
  55. data/ext/asmjit/asmjit/src/asmjit/core/codebuffer.h +113 -0
  56. data/ext/asmjit/asmjit/src/asmjit/core/codeholder.cpp +1149 -0
  57. data/ext/asmjit/asmjit/src/asmjit/core/codeholder.h +1035 -0
  58. data/ext/asmjit/asmjit/src/asmjit/core/codewriter.cpp +175 -0
  59. data/ext/asmjit/asmjit/src/asmjit/core/codewriter_p.h +179 -0
  60. data/ext/asmjit/asmjit/src/asmjit/core/compiler.cpp +582 -0
  61. data/ext/asmjit/asmjit/src/asmjit/core/compiler.h +737 -0
  62. data/ext/asmjit/asmjit/src/asmjit/core/compilerdefs.h +173 -0
  63. data/ext/asmjit/asmjit/src/asmjit/core/constpool.cpp +363 -0
  64. data/ext/asmjit/asmjit/src/asmjit/core/constpool.h +250 -0
  65. data/ext/asmjit/asmjit/src/asmjit/core/cpuinfo.cpp +1162 -0
  66. data/ext/asmjit/asmjit/src/asmjit/core/cpuinfo.h +813 -0
  67. data/ext/asmjit/asmjit/src/asmjit/core/emithelper.cpp +323 -0
  68. data/ext/asmjit/asmjit/src/asmjit/core/emithelper_p.h +58 -0
  69. data/ext/asmjit/asmjit/src/asmjit/core/emitter.cpp +333 -0
  70. data/ext/asmjit/asmjit/src/asmjit/core/emitter.h +741 -0
  71. data/ext/asmjit/asmjit/src/asmjit/core/emitterutils.cpp +129 -0
  72. data/ext/asmjit/asmjit/src/asmjit/core/emitterutils_p.h +89 -0
  73. data/ext/asmjit/asmjit/src/asmjit/core/environment.cpp +46 -0
  74. data/ext/asmjit/asmjit/src/asmjit/core/environment.h +508 -0
  75. data/ext/asmjit/asmjit/src/asmjit/core/errorhandler.cpp +14 -0
  76. data/ext/asmjit/asmjit/src/asmjit/core/errorhandler.h +228 -0
  77. data/ext/asmjit/asmjit/src/asmjit/core/formatter.cpp +584 -0
  78. data/ext/asmjit/asmjit/src/asmjit/core/formatter.h +247 -0
  79. data/ext/asmjit/asmjit/src/asmjit/core/formatter_p.h +34 -0
  80. data/ext/asmjit/asmjit/src/asmjit/core/func.cpp +286 -0
  81. data/ext/asmjit/asmjit/src/asmjit/core/func.h +1445 -0
  82. data/ext/asmjit/asmjit/src/asmjit/core/funcargscontext.cpp +293 -0
  83. data/ext/asmjit/asmjit/src/asmjit/core/funcargscontext_p.h +199 -0
  84. data/ext/asmjit/asmjit/src/asmjit/core/globals.cpp +133 -0
  85. data/ext/asmjit/asmjit/src/asmjit/core/globals.h +393 -0
  86. data/ext/asmjit/asmjit/src/asmjit/core/inst.cpp +113 -0
  87. data/ext/asmjit/asmjit/src/asmjit/core/inst.h +772 -0
  88. data/ext/asmjit/asmjit/src/asmjit/core/jitallocator.cpp +1242 -0
  89. data/ext/asmjit/asmjit/src/asmjit/core/jitallocator.h +261 -0
  90. data/ext/asmjit/asmjit/src/asmjit/core/jitruntime.cpp +80 -0
  91. data/ext/asmjit/asmjit/src/asmjit/core/jitruntime.h +89 -0
  92. data/ext/asmjit/asmjit/src/asmjit/core/logger.cpp +69 -0
  93. data/ext/asmjit/asmjit/src/asmjit/core/logger.h +198 -0
  94. data/ext/asmjit/asmjit/src/asmjit/core/misc_p.h +33 -0
  95. data/ext/asmjit/asmjit/src/asmjit/core/operand.cpp +132 -0
  96. data/ext/asmjit/asmjit/src/asmjit/core/operand.h +1611 -0
  97. data/ext/asmjit/asmjit/src/asmjit/core/osutils.cpp +84 -0
  98. data/ext/asmjit/asmjit/src/asmjit/core/osutils.h +61 -0
  99. data/ext/asmjit/asmjit/src/asmjit/core/osutils_p.h +68 -0
  100. data/ext/asmjit/asmjit/src/asmjit/core/raassignment_p.h +418 -0
  101. data/ext/asmjit/asmjit/src/asmjit/core/rabuilders_p.h +612 -0
  102. data/ext/asmjit/asmjit/src/asmjit/core/radefs_p.h +1204 -0
  103. data/ext/asmjit/asmjit/src/asmjit/core/ralocal.cpp +1166 -0
  104. data/ext/asmjit/asmjit/src/asmjit/core/ralocal_p.h +254 -0
  105. data/ext/asmjit/asmjit/src/asmjit/core/rapass.cpp +1969 -0
  106. data/ext/asmjit/asmjit/src/asmjit/core/rapass_p.h +1183 -0
  107. data/ext/asmjit/asmjit/src/asmjit/core/rastack.cpp +184 -0
  108. data/ext/asmjit/asmjit/src/asmjit/core/rastack_p.h +171 -0
  109. data/ext/asmjit/asmjit/src/asmjit/core/string.cpp +559 -0
  110. data/ext/asmjit/asmjit/src/asmjit/core/string.h +372 -0
  111. data/ext/asmjit/asmjit/src/asmjit/core/support.cpp +494 -0
  112. data/ext/asmjit/asmjit/src/asmjit/core/support.h +1773 -0
  113. data/ext/asmjit/asmjit/src/asmjit/core/target.cpp +14 -0
  114. data/ext/asmjit/asmjit/src/asmjit/core/target.h +53 -0
  115. data/ext/asmjit/asmjit/src/asmjit/core/type.cpp +74 -0
  116. data/ext/asmjit/asmjit/src/asmjit/core/type.h +419 -0
  117. data/ext/asmjit/asmjit/src/asmjit/core/virtmem.cpp +722 -0
  118. data/ext/asmjit/asmjit/src/asmjit/core/virtmem.h +242 -0
  119. data/ext/asmjit/asmjit/src/asmjit/core/zone.cpp +353 -0
  120. data/ext/asmjit/asmjit/src/asmjit/core/zone.h +615 -0
  121. data/ext/asmjit/asmjit/src/asmjit/core/zonehash.cpp +309 -0
  122. data/ext/asmjit/asmjit/src/asmjit/core/zonehash.h +186 -0
  123. data/ext/asmjit/asmjit/src/asmjit/core/zonelist.cpp +163 -0
  124. data/ext/asmjit/asmjit/src/asmjit/core/zonelist.h +209 -0
  125. data/ext/asmjit/asmjit/src/asmjit/core/zonestack.cpp +176 -0
  126. data/ext/asmjit/asmjit/src/asmjit/core/zonestack.h +239 -0
  127. data/ext/asmjit/asmjit/src/asmjit/core/zonestring.h +120 -0
  128. data/ext/asmjit/asmjit/src/asmjit/core/zonetree.cpp +99 -0
  129. data/ext/asmjit/asmjit/src/asmjit/core/zonetree.h +380 -0
  130. data/ext/asmjit/asmjit/src/asmjit/core/zonevector.cpp +356 -0
  131. data/ext/asmjit/asmjit/src/asmjit/core/zonevector.h +690 -0
  132. data/ext/asmjit/asmjit/src/asmjit/core.h +1861 -0
  133. data/ext/asmjit/asmjit/src/asmjit/x86/x86archtraits_p.h +148 -0
  134. data/ext/asmjit/asmjit/src/asmjit/x86/x86assembler.cpp +5110 -0
  135. data/ext/asmjit/asmjit/src/asmjit/x86/x86assembler.h +685 -0
  136. data/ext/asmjit/asmjit/src/asmjit/x86/x86builder.cpp +52 -0
  137. data/ext/asmjit/asmjit/src/asmjit/x86/x86builder.h +351 -0
  138. data/ext/asmjit/asmjit/src/asmjit/x86/x86compiler.cpp +61 -0
  139. data/ext/asmjit/asmjit/src/asmjit/x86/x86compiler.h +721 -0
  140. data/ext/asmjit/asmjit/src/asmjit/x86/x86emithelper.cpp +619 -0
  141. data/ext/asmjit/asmjit/src/asmjit/x86/x86emithelper_p.h +60 -0
  142. data/ext/asmjit/asmjit/src/asmjit/x86/x86emitter.h +4315 -0
  143. data/ext/asmjit/asmjit/src/asmjit/x86/x86formatter.cpp +944 -0
  144. data/ext/asmjit/asmjit/src/asmjit/x86/x86formatter_p.h +58 -0
  145. data/ext/asmjit/asmjit/src/asmjit/x86/x86func.cpp +503 -0
  146. data/ext/asmjit/asmjit/src/asmjit/x86/x86func_p.h +33 -0
  147. data/ext/asmjit/asmjit/src/asmjit/x86/x86globals.h +2169 -0
  148. data/ext/asmjit/asmjit/src/asmjit/x86/x86instapi.cpp +1732 -0
  149. data/ext/asmjit/asmjit/src/asmjit/x86/x86instapi_p.h +41 -0
  150. data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb.cpp +4427 -0
  151. data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb.h +563 -0
  152. data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb_p.h +311 -0
  153. data/ext/asmjit/asmjit/src/asmjit/x86/x86opcode_p.h +436 -0
  154. data/ext/asmjit/asmjit/src/asmjit/x86/x86operand.cpp +231 -0
  155. data/ext/asmjit/asmjit/src/asmjit/x86/x86operand.h +1085 -0
  156. data/ext/asmjit/asmjit/src/asmjit/x86/x86rapass.cpp +1509 -0
  157. data/ext/asmjit/asmjit/src/asmjit/x86/x86rapass_p.h +94 -0
  158. data/ext/asmjit/asmjit/src/asmjit/x86.h +93 -0
  159. data/ext/asmjit/asmjit/src/asmjit.natvis +245 -0
  160. data/ext/asmjit/asmjit/test/asmjit_test_assembler.cpp +84 -0
  161. data/ext/asmjit/asmjit/test/asmjit_test_assembler.h +85 -0
  162. data/ext/asmjit/asmjit/test/asmjit_test_assembler_a64.cpp +4006 -0
  163. data/ext/asmjit/asmjit/test/asmjit_test_assembler_x64.cpp +17833 -0
  164. data/ext/asmjit/asmjit/test/asmjit_test_assembler_x86.cpp +8300 -0
  165. data/ext/asmjit/asmjit/test/asmjit_test_compiler.cpp +253 -0
  166. data/ext/asmjit/asmjit/test/asmjit_test_compiler.h +73 -0
  167. data/ext/asmjit/asmjit/test/asmjit_test_compiler_a64.cpp +690 -0
  168. data/ext/asmjit/asmjit/test/asmjit_test_compiler_x86.cpp +4317 -0
  169. data/ext/asmjit/asmjit/test/asmjit_test_emitters.cpp +197 -0
  170. data/ext/asmjit/asmjit/test/asmjit_test_instinfo.cpp +181 -0
  171. data/ext/asmjit/asmjit/test/asmjit_test_misc.h +257 -0
  172. data/ext/asmjit/asmjit/test/asmjit_test_perf.cpp +62 -0
  173. data/ext/asmjit/asmjit/test/asmjit_test_perf.h +61 -0
  174. data/ext/asmjit/asmjit/test/asmjit_test_perf_a64.cpp +699 -0
  175. data/ext/asmjit/asmjit/test/asmjit_test_perf_x86.cpp +5032 -0
  176. data/ext/asmjit/asmjit/test/asmjit_test_unit.cpp +172 -0
  177. data/ext/asmjit/asmjit/test/asmjit_test_x86_sections.cpp +172 -0
  178. data/ext/asmjit/asmjit/test/asmjitutils.h +38 -0
  179. data/ext/asmjit/asmjit/test/broken.cpp +312 -0
  180. data/ext/asmjit/asmjit/test/broken.h +148 -0
  181. data/ext/asmjit/asmjit/test/cmdline.h +61 -0
  182. data/ext/asmjit/asmjit/test/performancetimer.h +41 -0
  183. data/ext/asmjit/asmjit/tools/configure-makefiles.sh +13 -0
  184. data/ext/asmjit/asmjit/tools/configure-ninja.sh +13 -0
  185. data/ext/asmjit/asmjit/tools/configure-sanitizers.sh +13 -0
  186. data/ext/asmjit/asmjit/tools/configure-vs2019-x64.bat +2 -0
  187. data/ext/asmjit/asmjit/tools/configure-vs2019-x86.bat +2 -0
  188. data/ext/asmjit/asmjit/tools/configure-vs2022-x64.bat +2 -0
  189. data/ext/asmjit/asmjit/tools/configure-vs2022-x86.bat +2 -0
  190. data/ext/asmjit/asmjit/tools/configure-xcode.sh +8 -0
  191. data/ext/asmjit/asmjit/tools/enumgen.js +417 -0
  192. data/ext/asmjit/asmjit/tools/enumgen.sh +3 -0
  193. data/ext/asmjit/asmjit/tools/tablegen-arm.js +365 -0
  194. data/ext/asmjit/asmjit/tools/tablegen-arm.sh +3 -0
  195. data/ext/asmjit/asmjit/tools/tablegen-x86.js +2638 -0
  196. data/ext/asmjit/asmjit/tools/tablegen-x86.sh +3 -0
  197. data/ext/asmjit/asmjit/tools/tablegen.js +947 -0
  198. data/ext/asmjit/asmjit/tools/tablegen.sh +4 -0
  199. data/ext/asmjit/asmjit.cc +18 -0
  200. data/lib/asmjit/version.rb +1 -1
  201. metadata +197 -2
@@ -0,0 +1,1162 @@
1
+ // This file is part of AsmJit project <https://asmjit.com>
2
+ //
3
+ // See asmjit.h or LICENSE.md for license and copyright information
4
+ // SPDX-License-Identifier: Zlib
5
+
6
+ #include "../core/api-build_p.h"
7
+ #include "../core/cpuinfo.h"
8
+ #include "../core/support.h"
9
+
10
+ #if !defined(_WIN32)
11
+ #include <errno.h>
12
+ #include <sys/utsname.h>
13
+ #include <unistd.h>
14
+ #endif
15
+
16
+ // Required by `getauxval()` on Linux.
17
+ #if defined(__linux__)
18
+ #include <sys/auxv.h>
19
+ #endif
20
+
21
+ //! Required to detect CPU and features on Apple platforms.
22
+ #if defined(__APPLE__)
23
+ #include <mach/machine.h>
24
+ #include <sys/types.h>
25
+ #include <sys/sysctl.h>
26
+ #endif
27
+
28
+ // Required by `__cpuidex()` and `_xgetbv()`.
29
+ #if defined(_MSC_VER)
30
+ #include <intrin.h>
31
+ #endif
32
+
33
+ ASMJIT_BEGIN_NAMESPACE
34
+
35
+ // CpuInfo - Detect - HW-Thread Count
36
+ // ==================================
37
+
38
+ #if defined(_WIN32)
39
+ static inline uint32_t detectHWThreadCount() noexcept {
40
+ SYSTEM_INFO info;
41
+ ::GetSystemInfo(&info);
42
+ return info.dwNumberOfProcessors;
43
+ }
44
+ #elif defined(_SC_NPROCESSORS_ONLN)
45
+ static inline uint32_t detectHWThreadCount() noexcept {
46
+ long res = ::sysconf(_SC_NPROCESSORS_ONLN);
47
+ return res <= 0 ? uint32_t(1) : uint32_t(res);
48
+ }
49
+ #else
50
+ static inline uint32_t detectHWThreadCount() noexcept {
51
+ return 1;
52
+ }
53
+ #endif
54
+
55
+ // CpuInfo - Detect - X86
56
+ // ======================
57
+
58
+ #if ASMJIT_ARCH_X86
59
+
60
+ struct cpuid_t { uint32_t eax, ebx, ecx, edx; };
61
+ struct xgetbv_t { uint32_t eax, edx; };
62
+
63
+ // Executes `cpuid` instruction.
64
+ static inline void cpuidQuery(cpuid_t* out, uint32_t inEax, uint32_t inEcx = 0) noexcept {
65
+ #if defined(_MSC_VER)
66
+ __cpuidex(reinterpret_cast<int*>(out), inEax, inEcx);
67
+ #elif defined(__GNUC__) && ASMJIT_ARCH_X86 == 32
68
+ __asm__ __volatile__(
69
+ "mov %%ebx, %%edi\n"
70
+ "cpuid\n"
71
+ "xchg %%edi, %%ebx\n" : "=a"(out->eax), "=D"(out->ebx), "=c"(out->ecx), "=d"(out->edx) : "a"(inEax), "c"(inEcx));
72
+ #elif defined(__GNUC__) && ASMJIT_ARCH_X86 == 64
73
+ __asm__ __volatile__(
74
+ "mov %%rbx, %%rdi\n"
75
+ "cpuid\n"
76
+ "xchg %%rdi, %%rbx\n" : "=a"(out->eax), "=D"(out->ebx), "=c"(out->ecx), "=d"(out->edx) : "a"(inEax), "c"(inEcx));
77
+ #else
78
+ #error "[asmjit] x86::cpuidQuery() - Unsupported compiler."
79
+ #endif
80
+ }
81
+
82
+ // Executes 'xgetbv' instruction.
83
+ static inline void xgetbvQuery(xgetbv_t* out, uint32_t inEcx) noexcept {
84
+ #if defined(_MSC_VER)
85
+ uint64_t value = _xgetbv(inEcx);
86
+ out->eax = uint32_t(value & 0xFFFFFFFFu);
87
+ out->edx = uint32_t(value >> 32);
88
+ #elif defined(__GNUC__)
89
+ uint32_t outEax;
90
+ uint32_t outEdx;
91
+
92
+ // Replaced, because the world is not perfect:
93
+ // __asm__ __volatile__("xgetbv" : "=a"(outEax), "=d"(outEdx) : "c"(inEcx));
94
+ __asm__ __volatile__(".byte 0x0F, 0x01, 0xD0" : "=a"(outEax), "=d"(outEdx) : "c"(inEcx));
95
+
96
+ out->eax = outEax;
97
+ out->edx = outEdx;
98
+ #else
99
+ out->eax = 0;
100
+ out->edx = 0;
101
+ #endif
102
+ }
103
+
104
+ // Map a 12-byte vendor string returned by `cpuid` into a `CpuInfo::Vendor` ID.
105
+ static inline void simplifyCpuVendor(CpuInfo& cpu, uint32_t d0, uint32_t d1, uint32_t d2) noexcept {
106
+ struct Vendor {
107
+ char normalized[8];
108
+ union { char text[12]; uint32_t d[3]; };
109
+ };
110
+
111
+ static const Vendor table[] = {
112
+ { { 'A', 'M', 'D' }, {{ 'A', 'u', 't', 'h', 'e', 'n', 't', 'i', 'c', 'A', 'M', 'D' }} },
113
+ { { 'I', 'N', 'T', 'E', 'L' }, {{ 'G', 'e', 'n', 'u', 'i', 'n', 'e', 'I', 'n', 't', 'e', 'l' }} },
114
+ { { 'V', 'I', 'A' }, {{ 'C', 'e', 'n', 't', 'a', 'u', 'r', 'H', 'a', 'u', 'l', 's' }} },
115
+ { { 'V', 'I', 'A' }, {{ 'V', 'I', 'A', 0 , 'V', 'I', 'A', 0 , 'V', 'I', 'A', 0 }} },
116
+ { { 'U', 'N', 'K', 'N', 'O', 'W', 'N' }, {{ 0 }} }
117
+ };
118
+
119
+ uint32_t i;
120
+ for (i = 0; i < ASMJIT_ARRAY_SIZE(table) - 1; i++)
121
+ if (table[i].d[0] == d0 && table[i].d[1] == d1 && table[i].d[2] == d2)
122
+ break;
123
+ memcpy(cpu._vendor.str, table[i].normalized, 8);
124
+ }
125
+
126
+ static ASMJIT_FAVOR_SIZE void simplifyCpuBrand(char* s) noexcept {
127
+ char* d = s;
128
+
129
+ char c = s[0];
130
+ char prev = 0;
131
+
132
+ // Used to always clear the current character to ensure that the result
133
+ // doesn't contain garbage after a new null terminator is placed at the end.
134
+ s[0] = '\0';
135
+
136
+ for (;;) {
137
+ if (!c)
138
+ break;
139
+
140
+ if (!(c == ' ' && (prev == '@' || s[1] == ' ' || s[1] == '@'))) {
141
+ *d++ = c;
142
+ prev = c;
143
+ }
144
+
145
+ c = *++s;
146
+ s[0] = '\0';
147
+ }
148
+
149
+ d[0] = '\0';
150
+ }
151
+
152
+ static ASMJIT_FAVOR_SIZE void detectX86Cpu(CpuInfo& cpu) noexcept {
153
+ using Support::bitTest;
154
+
155
+ cpuid_t regs;
156
+ xgetbv_t xcr0 { 0, 0 };
157
+ CpuFeatures::X86& features = cpu.features().x86();
158
+
159
+ cpu._wasDetected = true;
160
+ cpu._maxLogicalProcessors = 1;
161
+
162
+ // We are gonna execute CPUID, which was introduced by I486, so it's the requirement.
163
+ features.add(CpuFeatures::X86::kI486);
164
+
165
+ // CPUID EAX=0
166
+ // -----------
167
+
168
+ // Get vendor string/id.
169
+ cpuidQuery(&regs, 0x0);
170
+
171
+ uint32_t maxId = regs.eax;
172
+ uint32_t maxSubLeafId_0x7 = 0;
173
+
174
+ simplifyCpuVendor(cpu, regs.ebx, regs.edx, regs.ecx);
175
+
176
+ // CPUID EAX=1
177
+ // -----------
178
+
179
+ if (maxId >= 0x1) {
180
+ // Get feature flags in ECX/EDX and family/model in EAX.
181
+ cpuidQuery(&regs, 0x1);
182
+
183
+ // Fill family and model fields.
184
+ uint32_t modelId = (regs.eax >> 4) & 0x0F;
185
+ uint32_t familyId = (regs.eax >> 8) & 0x0F;
186
+
187
+ // Use extended family and model fields.
188
+ if (familyId == 0x06u || familyId == 0x0Fu)
189
+ modelId += (((regs.eax >> 16) & 0x0Fu) << 4);
190
+
191
+ if (familyId == 0x0Fu)
192
+ familyId += ((regs.eax >> 20) & 0xFFu);
193
+
194
+ cpu._modelId = modelId;
195
+ cpu._familyId = familyId;
196
+ cpu._brandId = ((regs.ebx ) & 0xFF);
197
+ cpu._processorType = ((regs.eax >> 12) & 0x03);
198
+ cpu._maxLogicalProcessors = ((regs.ebx >> 16) & 0xFF);
199
+ cpu._stepping = ((regs.eax ) & 0x0F);
200
+ cpu._cacheLineSize = ((regs.ebx >> 8) & 0xFF) * 8;
201
+
202
+ features.addIf(bitTest(regs.ecx, 0), CpuFeatures::X86::kSSE3);
203
+ features.addIf(bitTest(regs.ecx, 1), CpuFeatures::X86::kPCLMULQDQ);
204
+ features.addIf(bitTest(regs.ecx, 3), CpuFeatures::X86::kMONITOR);
205
+ features.addIf(bitTest(regs.ecx, 5), CpuFeatures::X86::kVMX);
206
+ features.addIf(bitTest(regs.ecx, 6), CpuFeatures::X86::kSMX);
207
+ features.addIf(bitTest(regs.ecx, 9), CpuFeatures::X86::kSSSE3);
208
+ features.addIf(bitTest(regs.ecx, 13), CpuFeatures::X86::kCMPXCHG16B);
209
+ features.addIf(bitTest(regs.ecx, 19), CpuFeatures::X86::kSSE4_1);
210
+ features.addIf(bitTest(regs.ecx, 20), CpuFeatures::X86::kSSE4_2);
211
+ features.addIf(bitTest(regs.ecx, 22), CpuFeatures::X86::kMOVBE);
212
+ features.addIf(bitTest(regs.ecx, 23), CpuFeatures::X86::kPOPCNT);
213
+ features.addIf(bitTest(regs.ecx, 25), CpuFeatures::X86::kAESNI);
214
+ features.addIf(bitTest(regs.ecx, 26), CpuFeatures::X86::kXSAVE);
215
+ features.addIf(bitTest(regs.ecx, 27), CpuFeatures::X86::kOSXSAVE);
216
+ features.addIf(bitTest(regs.ecx, 30), CpuFeatures::X86::kRDRAND);
217
+ features.addIf(bitTest(regs.edx, 0), CpuFeatures::X86::kFPU);
218
+ features.addIf(bitTest(regs.edx, 4), CpuFeatures::X86::kRDTSC);
219
+ features.addIf(bitTest(regs.edx, 5), CpuFeatures::X86::kMSR);
220
+ features.addIf(bitTest(regs.edx, 8), CpuFeatures::X86::kCMPXCHG8B);
221
+ features.addIf(bitTest(regs.edx, 15), CpuFeatures::X86::kCMOV);
222
+ features.addIf(bitTest(regs.edx, 19), CpuFeatures::X86::kCLFLUSH);
223
+ features.addIf(bitTest(regs.edx, 23), CpuFeatures::X86::kMMX);
224
+ features.addIf(bitTest(regs.edx, 24), CpuFeatures::X86::kFXSR);
225
+ features.addIf(bitTest(regs.edx, 25), CpuFeatures::X86::kSSE);
226
+ features.addIf(bitTest(regs.edx, 25), CpuFeatures::X86::kSSE, CpuFeatures::X86::kSSE2);
227
+ features.addIf(bitTest(regs.edx, 28), CpuFeatures::X86::kMT);
228
+
229
+ // Get the content of XCR0 if supported by the CPU and enabled by the OS.
230
+ if (features.hasXSAVE() && features.hasOSXSAVE()) {
231
+ xgetbvQuery(&xcr0, 0);
232
+ }
233
+
234
+ // Detect AVX+.
235
+ if (bitTest(regs.ecx, 28)) {
236
+ // - XCR0[2:1] == 11b
237
+ // XMM & YMM states need to be enabled by OS.
238
+ if ((xcr0.eax & 0x00000006u) == 0x00000006u) {
239
+ features.add(CpuFeatures::X86::kAVX);
240
+ features.addIf(bitTest(regs.ecx, 12), CpuFeatures::X86::kFMA);
241
+ features.addIf(bitTest(regs.ecx, 29), CpuFeatures::X86::kF16C);
242
+ }
243
+ }
244
+ }
245
+
246
+ constexpr uint32_t kXCR0_AMX_Bits = 0x3u << 17;
247
+ bool amxEnabledByOS = (xcr0.eax & kXCR0_AMX_Bits) == kXCR0_AMX_Bits;
248
+
249
+ #if defined(__APPLE__)
250
+ // Apple platform provides on-demand AVX512 support. When an AVX512 instruction is used the first time it results
251
+ // in #UD, which would cause the thread being promoted to use AVX512 support by the OS in addition to enabling the
252
+ // necessary bits in XCR0 register.
253
+ bool avx512EnabledByOS = true;
254
+ #else
255
+ // - XCR0[2:1] == 11b - XMM/YMM states need to be enabled by OS.
256
+ // - XCR0[7:5] == 111b - Upper 256-bit of ZMM0-XMM15 and ZMM16-ZMM31 need to be enabled by OS.
257
+ constexpr uint32_t kXCR0_AVX512_Bits = (0x3u << 1) | (0x7u << 5);
258
+ bool avx512EnabledByOS = (xcr0.eax & kXCR0_AVX512_Bits) == kXCR0_AVX512_Bits;
259
+ #endif
260
+
261
+ // CPUID EAX=7 ECX=0
262
+ // -----------------
263
+
264
+ // Detect new features if the processor supports CPUID-07.
265
+ bool maybeMPX = false;
266
+
267
+ if (maxId >= 0x7) {
268
+ cpuidQuery(&regs, 0x7);
269
+
270
+ maybeMPX = bitTest(regs.ebx, 14);
271
+ maxSubLeafId_0x7 = regs.eax;
272
+
273
+ features.addIf(bitTest(regs.ebx, 0), CpuFeatures::X86::kFSGSBASE);
274
+ features.addIf(bitTest(regs.ebx, 3), CpuFeatures::X86::kBMI);
275
+ features.addIf(bitTest(regs.ebx, 4), CpuFeatures::X86::kHLE);
276
+ features.addIf(bitTest(regs.ebx, 7), CpuFeatures::X86::kSMEP);
277
+ features.addIf(bitTest(regs.ebx, 8), CpuFeatures::X86::kBMI2);
278
+ features.addIf(bitTest(regs.ebx, 9), CpuFeatures::X86::kERMS);
279
+ features.addIf(bitTest(regs.ebx, 11), CpuFeatures::X86::kRTM);
280
+ features.addIf(bitTest(regs.ebx, 18), CpuFeatures::X86::kRDSEED);
281
+ features.addIf(bitTest(regs.ebx, 19), CpuFeatures::X86::kADX);
282
+ features.addIf(bitTest(regs.ebx, 20), CpuFeatures::X86::kSMAP);
283
+ features.addIf(bitTest(regs.ebx, 23), CpuFeatures::X86::kCLFLUSHOPT);
284
+ features.addIf(bitTest(regs.ebx, 24), CpuFeatures::X86::kCLWB);
285
+ features.addIf(bitTest(regs.ebx, 29), CpuFeatures::X86::kSHA);
286
+ features.addIf(bitTest(regs.ecx, 0), CpuFeatures::X86::kPREFETCHWT1);
287
+ features.addIf(bitTest(regs.ecx, 4), CpuFeatures::X86::kOSPKE);
288
+ features.addIf(bitTest(regs.ecx, 5), CpuFeatures::X86::kWAITPKG);
289
+ features.addIf(bitTest(regs.ecx, 7), CpuFeatures::X86::kCET_SS);
290
+ features.addIf(bitTest(regs.ecx, 8), CpuFeatures::X86::kGFNI);
291
+ features.addIf(bitTest(regs.ecx, 9), CpuFeatures::X86::kVAES);
292
+ features.addIf(bitTest(regs.ecx, 10), CpuFeatures::X86::kVPCLMULQDQ);
293
+ features.addIf(bitTest(regs.ecx, 22), CpuFeatures::X86::kRDPID);
294
+ features.addIf(bitTest(regs.ecx, 25), CpuFeatures::X86::kCLDEMOTE);
295
+ features.addIf(bitTest(regs.ecx, 27), CpuFeatures::X86::kMOVDIRI);
296
+ features.addIf(bitTest(regs.ecx, 28), CpuFeatures::X86::kMOVDIR64B);
297
+ features.addIf(bitTest(regs.ecx, 29), CpuFeatures::X86::kENQCMD);
298
+ features.addIf(bitTest(regs.edx, 5), CpuFeatures::X86::kUINTR);
299
+ features.addIf(bitTest(regs.edx, 14), CpuFeatures::X86::kSERIALIZE);
300
+ features.addIf(bitTest(regs.edx, 16), CpuFeatures::X86::kTSXLDTRK);
301
+ features.addIf(bitTest(regs.edx, 18), CpuFeatures::X86::kPCONFIG);
302
+ features.addIf(bitTest(regs.edx, 20), CpuFeatures::X86::kCET_IBT);
303
+
304
+ // Detect 'TSX' - Requires at least one of `HLE` and `RTM` features.
305
+ if (features.hasHLE() || features.hasRTM())
306
+ features.add(CpuFeatures::X86::kTSX);
307
+
308
+ // Detect 'AVX2' - Requires AVX as well.
309
+ if (bitTest(regs.ebx, 5) && features.hasAVX())
310
+ features.add(CpuFeatures::X86::kAVX2);
311
+
312
+ // Detect 'AVX512'.
313
+ if (avx512EnabledByOS && bitTest(regs.ebx, 16)) {
314
+ features.add(CpuFeatures::X86::kAVX512_F);
315
+
316
+ features.addIf(bitTest(regs.ebx, 17), CpuFeatures::X86::kAVX512_DQ);
317
+ features.addIf(bitTest(regs.ebx, 21), CpuFeatures::X86::kAVX512_IFMA);
318
+ features.addIf(bitTest(regs.ebx, 26), CpuFeatures::X86::kAVX512_PFI);
319
+ features.addIf(bitTest(regs.ebx, 27), CpuFeatures::X86::kAVX512_ERI);
320
+ features.addIf(bitTest(regs.ebx, 28), CpuFeatures::X86::kAVX512_CDI);
321
+ features.addIf(bitTest(regs.ebx, 30), CpuFeatures::X86::kAVX512_BW);
322
+ features.addIf(bitTest(regs.ebx, 31), CpuFeatures::X86::kAVX512_VL);
323
+ features.addIf(bitTest(regs.ecx, 1), CpuFeatures::X86::kAVX512_VBMI);
324
+ features.addIf(bitTest(regs.ecx, 6), CpuFeatures::X86::kAVX512_VBMI2);
325
+ features.addIf(bitTest(regs.ecx, 11), CpuFeatures::X86::kAVX512_VNNI);
326
+ features.addIf(bitTest(regs.ecx, 12), CpuFeatures::X86::kAVX512_BITALG);
327
+ features.addIf(bitTest(regs.ecx, 14), CpuFeatures::X86::kAVX512_VPOPCNTDQ);
328
+ features.addIf(bitTest(regs.edx, 2), CpuFeatures::X86::kAVX512_4VNNIW);
329
+ features.addIf(bitTest(regs.edx, 3), CpuFeatures::X86::kAVX512_4FMAPS);
330
+ features.addIf(bitTest(regs.edx, 8), CpuFeatures::X86::kAVX512_VP2INTERSECT);
331
+ features.addIf(bitTest(regs.edx, 23), CpuFeatures::X86::kAVX512_FP16);
332
+ }
333
+
334
+ // Detect 'AMX'.
335
+ if (amxEnabledByOS) {
336
+ features.addIf(bitTest(regs.edx, 22), CpuFeatures::X86::kAMX_BF16);
337
+ features.addIf(bitTest(regs.edx, 24), CpuFeatures::X86::kAMX_TILE);
338
+ features.addIf(bitTest(regs.edx, 25), CpuFeatures::X86::kAMX_INT8);
339
+ }
340
+ }
341
+
342
+ // CPUID EAX=7 ECX=1
343
+ // -----------------
344
+
345
+ if (features.hasAVX512_F() && maxSubLeafId_0x7 >= 1) {
346
+ cpuidQuery(&regs, 0x7, 1);
347
+
348
+ features.addIf(bitTest(regs.eax, 3), CpuFeatures::X86::kAVX_VNNI);
349
+ features.addIf(bitTest(regs.eax, 5), CpuFeatures::X86::kAVX512_BF16);
350
+ features.addIf(bitTest(regs.eax, 22), CpuFeatures::X86::kHRESET);
351
+ }
352
+
353
+ // CPUID EAX=13 ECX=0
354
+ // ------------------
355
+
356
+ if (maxId >= 0xD) {
357
+ cpuidQuery(&regs, 0xD, 0);
358
+
359
+ // Both CPUID result and XCR0 has to be enabled to have support for MPX.
360
+ if (((regs.eax & xcr0.eax) & 0x00000018u) == 0x00000018u && maybeMPX)
361
+ features.add(CpuFeatures::X86::kMPX);
362
+
363
+ cpuidQuery(&regs, 0xD, 1);
364
+
365
+ features.addIf(bitTest(regs.eax, 0), CpuFeatures::X86::kXSAVEOPT);
366
+ features.addIf(bitTest(regs.eax, 1), CpuFeatures::X86::kXSAVEC);
367
+ features.addIf(bitTest(regs.eax, 3), CpuFeatures::X86::kXSAVES);
368
+ }
369
+
370
+ // CPUID EAX=14 ECX=0
371
+ // ------------------
372
+
373
+ if (maxId >= 0xE) {
374
+ cpuidQuery(&regs, 0xE, 0);
375
+
376
+ features.addIf(bitTest(regs.ebx, 4), CpuFeatures::X86::kPTWRITE);
377
+ }
378
+
379
+ // CPUID EAX=0x80000000...maxId
380
+ // ----------------------------
381
+
382
+ maxId = 0x80000000u;
383
+ uint32_t i = maxId;
384
+
385
+ // The highest EAX that we understand.
386
+ constexpr uint32_t kHighestProcessedEAX = 0x8000001Fu;
387
+
388
+ // Several CPUID calls are required to get the whole branc string. It's easier
389
+ // to copy one DWORD at a time instead of copying the string a byte by byte.
390
+ uint32_t* brand = cpu._brand.u32;
391
+ do {
392
+ cpuidQuery(&regs, i);
393
+ switch (i) {
394
+ case 0x80000000u:
395
+ maxId = Support::min<uint32_t>(regs.eax, kHighestProcessedEAX);
396
+ break;
397
+
398
+ case 0x80000001u:
399
+ features.addIf(bitTest(regs.ecx, 0), CpuFeatures::X86::kLAHFSAHF);
400
+ features.addIf(bitTest(regs.ecx, 2), CpuFeatures::X86::kSVM);
401
+ features.addIf(bitTest(regs.ecx, 5), CpuFeatures::X86::kLZCNT);
402
+ features.addIf(bitTest(regs.ecx, 6), CpuFeatures::X86::kSSE4A);
403
+ features.addIf(bitTest(regs.ecx, 7), CpuFeatures::X86::kMSSE);
404
+ features.addIf(bitTest(regs.ecx, 8), CpuFeatures::X86::kPREFETCHW);
405
+ features.addIf(bitTest(regs.ecx, 12), CpuFeatures::X86::kSKINIT);
406
+ features.addIf(bitTest(regs.ecx, 15), CpuFeatures::X86::kLWP);
407
+ features.addIf(bitTest(regs.ecx, 21), CpuFeatures::X86::kTBM);
408
+ features.addIf(bitTest(regs.ecx, 29), CpuFeatures::X86::kMONITORX);
409
+ features.addIf(bitTest(regs.edx, 20), CpuFeatures::X86::kNX);
410
+ features.addIf(bitTest(regs.edx, 21), CpuFeatures::X86::kFXSROPT);
411
+ features.addIf(bitTest(regs.edx, 22), CpuFeatures::X86::kMMX2);
412
+ features.addIf(bitTest(regs.edx, 27), CpuFeatures::X86::kRDTSCP);
413
+ features.addIf(bitTest(regs.edx, 29), CpuFeatures::X86::kPREFETCHW);
414
+ features.addIf(bitTest(regs.edx, 30), CpuFeatures::X86::k3DNOW2, CpuFeatures::X86::kMMX2);
415
+ features.addIf(bitTest(regs.edx, 31), CpuFeatures::X86::kPREFETCHW);
416
+
417
+ if (features.hasAVX()) {
418
+ features.addIf(bitTest(regs.ecx, 11), CpuFeatures::X86::kXOP);
419
+ features.addIf(bitTest(regs.ecx, 16), CpuFeatures::X86::kFMA4);
420
+ }
421
+
422
+ // This feature seems to be only supported by AMD.
423
+ if (cpu.isVendor("AMD")) {
424
+ features.addIf(bitTest(regs.ecx, 4), CpuFeatures::X86::kALTMOVCR8);
425
+ }
426
+ break;
427
+
428
+ case 0x80000002u:
429
+ case 0x80000003u:
430
+ case 0x80000004u:
431
+ *brand++ = regs.eax;
432
+ *brand++ = regs.ebx;
433
+ *brand++ = regs.ecx;
434
+ *brand++ = regs.edx;
435
+
436
+ // Go directly to the next one we are interested in.
437
+ if (i == 0x80000004u)
438
+ i = 0x80000008u - 1;
439
+ break;
440
+
441
+ case 0x80000008u:
442
+ features.addIf(bitTest(regs.ebx, 0), CpuFeatures::X86::kCLZERO);
443
+ features.addIf(bitTest(regs.ebx, 0), CpuFeatures::X86::kRDPRU);
444
+ features.addIf(bitTest(regs.ebx, 8), CpuFeatures::X86::kMCOMMIT);
445
+ features.addIf(bitTest(regs.ebx, 9), CpuFeatures::X86::kWBNOINVD);
446
+
447
+ // Go directly to the next one we are interested in.
448
+ i = 0x8000001Fu - 1;
449
+ break;
450
+
451
+ case 0x8000001Fu:
452
+ features.addIf(bitTest(regs.eax, 4), CpuFeatures::X86::kSNP);
453
+ break;
454
+ }
455
+ } while (++i <= maxId);
456
+
457
+ // Simplify CPU brand string a bit by removing some unnecessary spaces.
458
+ simplifyCpuBrand(cpu._brand.str);
459
+ }
460
+
461
+ #endif // ASMJIT_ARCH_X86
462
+
463
+ // CpuInfo - Detect - ARM
464
+ // ======================
465
+
466
+ // The most relevant and accurate information can be found here:
467
+ // https://github.com/llvm-project/llvm/blob/master/lib/Target/AArch64/AArch64.td
468
+ // https://github.com/apple/llvm-project/blob/apple/main/llvm/lib/Target/AArch64/AArch64.td (Apple fork)
469
+ //
470
+ // Other resources:
471
+ // https://en.wikipedia.org/wiki/AArch64
472
+ // https://en.wikipedia.org/wiki/Apple_silicon#List_of_Apple_processors
473
+ // https://developer.arm.com/architectures/learn-the-architecture/understanding-the-armv8-x-extensions/single-page
474
+
475
+ #if ASMJIT_ARCH_ARM
476
+
477
+ static inline void populateBaseARMFeatures(CpuInfo& cpu) noexcept {
478
+ #if ASMJIT_ARCH_ARM == 32
479
+ // No baseline flags at the moment.
480
+ DebugUtils::unused(cpu);
481
+ #else
482
+ // AArch64 is based on ARMv8-A and later.
483
+ cpu.addFeature(CpuFeatures::ARM::kARMv6);
484
+ cpu.addFeature(CpuFeatures::ARM::kARMv7);
485
+ cpu.addFeature(CpuFeatures::ARM::kARMv8a);
486
+
487
+ // AArch64 comes with these features by default.
488
+ cpu.addFeature(CpuFeatures::ARM::kVFPv2);
489
+ cpu.addFeature(CpuFeatures::ARM::kVFPv3);
490
+ cpu.addFeature(CpuFeatures::ARM::kVFPv4);
491
+ cpu.addFeature(CpuFeatures::ARM::kASIMD);
492
+ cpu.addFeature(CpuFeatures::ARM::kIDIVA);
493
+ #endif
494
+ }
495
+
496
+ // Detects ARM version by macros defined at compile time. This means that AsmJit will report features forced at
497
+ // compile time that should always be provided by the target CPU. This also means that if we don't provide any
498
+ // means to detect CPU features the features reported by AsmJit will at least not report less features than the
499
+ // target it was compiled to.
500
+ ASMJIT_MAYBE_UNUSED
501
+ static ASMJIT_FAVOR_SIZE void detectARMFeaturesViaCompilerFlags(CpuInfo& cpu) noexcept {
502
+ DebugUtils::unused(cpu);
503
+
504
+ #if ASMJIT_ARCH_ARM == 32
505
+
506
+ // ARM targets have no baseline at the moment.
507
+ # if defined(__ARM_ARCH_7A__)
508
+ cpu.addFeature(CpuFeatures::ARM::kARMv7);
509
+ # endif
510
+ # if defined(__ARM_ARCH_8A__)
511
+ cpu.addFeature(CpuFeatures::ARM::kARMv8a);
512
+ # endif
513
+
514
+ # if defined(__TARGET_ARCH_THUMB)
515
+ cpu.addFeature(CpuFeatures::ARM::kTHUMB);
516
+ # if __TARGET_ARCH_THUMB >= 4
517
+ cpu.addFeature(CpuFeatures::ARM::kTHUMBv2);
518
+ # endif
519
+ # endif
520
+
521
+ # if defined(__ARM_FEATURE_FMA)
522
+ cpu.addFeature(CpuFeatures::ARM::kVFPv3);
523
+ cpu.addFeature(CpuFeatures::ARM::kVFPv4);
524
+ # endif
525
+
526
+ # if defined(__ARM_NEON)
527
+ cpu.addFeature(CpuFeatures::ARM::kASIMD);
528
+ # endif
529
+
530
+ # if defined(__ARM_FEATURE_IDIV) && defined(__TARGET_ARCH_THUMB)
531
+ cpu.addFeature(CpuFeatures::ARM::kIDIVT);
532
+ #endif
533
+ # if defined(__ARM_FEATURE_IDIV) && !defined(__TARGET_ARCH_THUMB)
534
+ cpu.addFeature(CpuFeatures::ARM::kIDIVA);
535
+ # endif
536
+
537
+ #endif
538
+
539
+ #if defined(__ARM_ARCH_8_1A__)
540
+ cpu.addFeature(CpuFeatures::ARM::kARMv8_1a);
541
+ #endif
542
+ #if defined(__ARM_ARCH_8_2A__)
543
+ cpu.addFeature(CpuFeatures::ARM::kARMv8_2a);
544
+ #endif
545
+ #if defined(__ARM_ARCH_8_3A__)
546
+ cpu.addFeature(CpuFeatures::ARM::kARMv8_3a);
547
+ #endif
548
+ #if defined(__ARM_ARCH_8_4A__)
549
+ cpu.addFeature(CpuFeatures::ARM::kARMv8_4a);
550
+ #endif
551
+ #if defined(__ARM_ARCH_8_5A__)
552
+ cpu.addFeature(CpuFeatures::ARM::kARMv8_5a);
553
+ #endif
554
+ #if defined(__ARM_ARCH_8_6A__)
555
+ cpu.addFeature(CpuFeatures::ARM::kARMv8_6a);
556
+ #endif
557
+ #if defined(__ARM_ARCH_8_7A__)
558
+ cpu.addFeature(CpuFeatures::ARM::kARMv8_7a);
559
+ #endif
560
+
561
+ #if defined(__ARM_FEATURE_AES)
562
+ cpu.addFeature(CpuFeatures::ARM::kAES);
563
+ #endif
564
+
565
+ #if defined(__ARM_FEATURE_BF16_SCALAR_ARITHMETIC) && defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC)
566
+ cpu.addFeature(CpuFeatures::ARM::kBF16);
567
+ #endif
568
+
569
+ #if defined(__ARM_FEATURE_CRC32)
570
+ cpu.addFeature(CpuFeatures::ARM::kCRC32);
571
+ #endif
572
+
573
+ #if defined(__ARM_FEATURE_CRYPTO)
574
+ cpu.addFeature(CpuFeatures::ARM::kAES,
575
+ CpuFeatures::ARM::kSHA1,
576
+ CpuFeatures::ARM::kSHA2);
577
+ #endif
578
+
579
+ #if defined(__ARM_FEATURE_DOTPROD)
580
+ cpu.addFeature(CpuFeatures::ARM::kDOTPROD);
581
+ #endif
582
+
583
+ #if defined(__ARM_FEATURE_FP16FML) || defined(__ARM_FEATURE_FP16_FML)
584
+ cpu.addFeature(CpuFeatures::ARM::kFP16FML);
585
+ #endif
586
+
587
+ #if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC)
588
+ cpu.addFeature(CpuFeatures::ARM::kFP16FULL);
589
+ #endif
590
+
591
+ #if defined(__ARM_FEATURE_FRINT)
592
+ cpu.addFeature(CpuFeatures::ARM::kFRINT);
593
+ #endif
594
+
595
+ #if defined(__ARM_FEATURE_JCVT)
596
+ cpu.addFeature(CpuFeatures::ARM::kFJCVTZS);
597
+ #endif
598
+
599
+ #if defined(__ARM_FEATURE_MATMUL_INT8)
600
+ cpu.addFeature(CpuFeatures::ARM::kI8MM);
601
+ #endif
602
+
603
+ #if defined(__ARM_FEATURE_ATOMICS)
604
+ cpu.addFeature(CpuFeatures::ARM::kLSE);
605
+ #endif
606
+
607
+ #if defined(__ARM_FEATURE_MEMORY_TAGGING)
608
+ cpu.addFeature(CpuFeatures::ARM::kMTE);
609
+ #endif
610
+
611
+ #if defined(__ARM_FEATURE_QRDMX)
612
+ cpu.addFeature(CpuFeatures::ARM::kRDM);
613
+ #endif
614
+
615
+ #if defined(__ARM_FEATURE_RNG)
616
+ cpu.addFeature(CpuFeatures::ARM::kRNG);
617
+ #endif
618
+
619
+ #if defined(__ARM_FEATURE_SHA2)
620
+ cpu.addFeature(CpuFeatures::ARM::kSHA2);
621
+ #endif
622
+
623
+ #if defined(__ARM_FEATURE_SHA3)
624
+ cpu.addFeature(CpuFeatures::ARM::kSHA3);
625
+ #endif
626
+
627
+ #if defined(__ARM_FEATURE_SHA512)
628
+ cpu.addFeature(CpuFeatures::ARM::kSHA512);
629
+ #endif
630
+
631
+ #if defined(__ARM_FEATURE_SM3)
632
+ cpu.addFeature(CpuFeatures::ARM::kSM3);
633
+ #endif
634
+
635
+ #if defined(__ARM_FEATURE_SM4)
636
+ cpu.addFeature(CpuFeatures::ARM::kSM4);
637
+ #endif
638
+
639
+ #if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_SVE_VECTOR_OPERATORS)
640
+ cpu.addFeature(CpuFeatures::ARM::kSVE);
641
+ #endif
642
+
643
+ #if defined(__ARM_FEATURE_SVE_MATMUL_INT8)
644
+ cpu.addFeature(CpuFeatures::ARM::kSVE_I8MM);
645
+ #endif
646
+
647
+ #if defined(__ARM_FEATURE_SVE_MATMUL_FP32)
648
+ cpu.addFeature(CpuFeatures::ARM::kSVE_F32MM);
649
+ #endif
650
+
651
+ #if defined(__ARM_FEATURE_SVE_MATMUL_FP64)
652
+ cpu.addFeature(CpuFeatures::ARM::kSVE_F64MM);
653
+ #endif
654
+
655
+ #if defined(__ARM_FEATURE_SVE2)
656
+ cpu.addFeature(CpuFeatures::ARM::kSVE2);
657
+ #endif
658
+
659
+ #if defined(__ARM_FEATURE_SVE2_AES)
660
+ cpu.addFeature(CpuFeatures::ARM::kSVE2_AES);
661
+ #endif
662
+
663
+ #if defined(__ARM_FEATURE_SVE2_BITPERM)
664
+ cpu.addFeature(CpuFeatures::ARM::kSVE2_BITPERM);
665
+ #endif
666
+
667
+ #if defined(__ARM_FEATURE_SVE2_SHA3)
668
+ cpu.addFeature(CpuFeatures::ARM::kSVE2_SHA3);
669
+ #endif
670
+
671
+ #if defined(__ARM_FEATURE_SVE2_SM4)
672
+ cpu.addFeature(CpuFeatures::ARM::kSVE2_SM4);
673
+ #endif
674
+
675
+ #if defined(__ARM_FEATURE_TME)
676
+ cpu.addFeature(CpuFeatures::ARM::kTME);
677
+ #endif
678
+ }
679
+
680
+ ASMJIT_MAYBE_UNUSED
681
+ static ASMJIT_FAVOR_SIZE void expandARMFeaturesByVersion(CpuInfo& cpu) noexcept {
682
+ CpuFeatures::ARM& features = cpu.features().arm();
683
+
684
+ if (features.hasARMv8_7a()) {
685
+ features.add(CpuFeatures::ARM::kARMv8_6a);
686
+ }
687
+
688
+ if (features.hasARMv8_6a()) {
689
+ features.add(CpuFeatures::ARM::kARMv8_5a,
690
+ CpuFeatures::ARM::kBF16);
691
+
692
+ if (features.hasSVE())
693
+ features.add(CpuFeatures::ARM::kSVE_I8MM);
694
+ }
695
+
696
+ if (features.hasARMv8_5a()) {
697
+ features.add(CpuFeatures::ARM::kARMv8_4a,
698
+ CpuFeatures::ARM::kALTNZCV,
699
+ CpuFeatures::ARM::kBTI,
700
+ CpuFeatures::ARM::kFRINT,
701
+ CpuFeatures::ARM::kSB,
702
+ CpuFeatures::ARM::kSSBS);
703
+ }
704
+
705
+ if (features.hasARMv8_4a()) {
706
+ features.add(CpuFeatures::ARM::kARMv8_3a,
707
+ CpuFeatures::ARM::kDIT,
708
+ CpuFeatures::ARM::kDOTPROD,
709
+ CpuFeatures::ARM::kFLAGM,
710
+ CpuFeatures::ARM::kPMU,
711
+ CpuFeatures::ARM::kRCPC_IMMO);
712
+ }
713
+
714
+ if (features.hasARMv8_3a()) {
715
+ features.add(CpuFeatures::ARM::kARMv8_2a,
716
+ CpuFeatures::ARM::kFCMA,
717
+ CpuFeatures::ARM::kFJCVTZS);
718
+ }
719
+
720
+ if (features.hasARMv8_2a()) {
721
+ features.add(CpuFeatures::ARM::kARMv8_1a);
722
+ }
723
+
724
+ if (features.hasARMv8_1a()) {
725
+ features.add(CpuFeatures::ARM::kARMv8a,
726
+ CpuFeatures::ARM::kCRC32,
727
+ CpuFeatures::ARM::kLSE,
728
+ CpuFeatures::ARM::kRDM);
729
+ }
730
+
731
+ if (features.hasARMv8a()) {
732
+ features.add(CpuFeatures::ARM::kARMv7,
733
+ CpuFeatures::ARM::kVFPv2,
734
+ CpuFeatures::ARM::kVFPv3,
735
+ CpuFeatures::ARM::kVFPv4,
736
+ CpuFeatures::ARM::kVFP_D32,
737
+ CpuFeatures::ARM::kASIMD,
738
+ CpuFeatures::ARM::kIDIVA);
739
+ }
740
+ }
741
+
742
+ // CpuInfo - Detect - ARM [Windows]
743
+ // ================================
744
+
745
+ #if defined(_WIN32)
746
+ struct WinPFPMapping {
747
+ uint8_t featureId;
748
+ uint8_t pfpFeatureId;
749
+ };
750
+
751
+ static ASMJIT_FAVOR_SIZE void detectPFPFeatures(CpuInfo& cpu, const WinPFPMapping* mapping, size_t size) noexcept {
752
+ for (size_t i = 0; i < size; i++)
753
+ if (::IsProcessorFeaturePresent(mapping[i].pfpFeatureId))
754
+ cpu.addFeature(mapping[i].featureId);
755
+ }
756
+
757
+ //! Detect ARM CPU features on Windows.
758
+ //!
759
+ //! The detection is based on `IsProcessorFeaturePresent()` API call.
760
+ static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
761
+ cpu._wasDetected = true;
762
+ populateBaseARMFeatures(cpu);
763
+
764
+ CpuFeatures::ARM& features = cpu.features().arm();
765
+
766
+ // Win32 for ARM requires ARMv7 with DSP extensions, VFPv3, and uses THUMBv2 by default.
767
+ #if ASMJIT_ARCH_ARM == 32
768
+ features.add(CpuFeatures::ARM::kTHUMB);
769
+ features.add(CpuFeatures::ARM::kTHUMBv2);
770
+ features.add(CpuFeatures::ARM::kARMv6);
771
+ features.add(CpuFeatures::ARM::kARMv7);
772
+ features.add(CpuFeatures::ARM::kEDSP);
773
+ features.add(CpuFeatures::ARM::kVFPv2);
774
+ features.add(CpuFeatures::ARM::kVFPv3);
775
+ #endif
776
+
777
+ // Windows for ARM requires ASIMD.
778
+ features.add(CpuFeatures::ARM::kASIMD);
779
+
780
+ // Detect additional CPU features by calling `IsProcessorFeaturePresent()`.
781
+ static const WinPFPMapping mapping[] = {
782
+ #if ASMJIT_ARCH_ARM == 32
783
+ { uint8_t(CpuFeatures::ARM::kVFP_D32) , 18 }, // PF_ARM_VFP_32_REGISTERS_AVAILABLE
784
+ { uint8_t(CpuFeatures::ARM::kIDIVT) , 24 }, // PF_ARM_DIVIDE_INSTRUCTION_AVAILABLE
785
+ { uint8_t(CpuFeatures::ARM::kVFPv4) , 27 }, // PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE
786
+ { uint8_t(CpuFeatures::ARM::kARMv8a) , 29 }, // PF_ARM_V8_INSTRUCTIONS_AVAILABLE
787
+ #endif
788
+ { uint8_t(CpuFeatures::ARM::kAES) , 30 }, // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE
789
+ { uint8_t(CpuFeatures::ARM::kCRC32) , 31 }, // PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE
790
+ { uint8_t(CpuFeatures::ARM::kLSE) , 34 } // PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE
791
+
792
+ };
793
+ detectPFPFeatures(cpu, mapping, ASMJIT_ARRAY_SIZE(mapping));
794
+
795
+ // Windows provides several instructions under a single flag:
796
+ if (features.hasAES()) {
797
+ features.add(CpuFeatures::ARM::kSHA1,
798
+ CpuFeatures::ARM::kSHA2);
799
+ }
800
+
801
+ expandARMFeaturesByVersion(cpu);
802
+ }
803
+
804
+ // CpuInfo - Detect - ARM [Linux]
805
+ // ==============================
806
+
807
+ #elif defined(__linux__)
808
+
809
+ struct LinuxHWCapMapping {
810
+ uint8_t featureId;
811
+ uint8_t hwCapBit;
812
+ };
813
+
814
+ static ASMJIT_FAVOR_SIZE void detectHWCaps(CpuInfo& cpu, unsigned long type, const LinuxHWCapMapping* mapping, size_t size) noexcept {
815
+ unsigned long mask = getauxval(type);
816
+ for (size_t i = 0; i < size; i++)
817
+ cpu.features().addIf(Support::bitTest(mask, mapping[i].hwCapBit), mapping[i].featureId);
818
+ }
819
+
820
+ #if ASMJIT_ARCH_ARM == 32
821
+
822
+ // `AT_HWCAP` provides ARMv7 (and less) related flags.
823
+ static const LinuxHWCapMapping hwCapMapping[] = {
824
+ { uint8_t(CpuFeatures::ARM::kVFPv2) , 6 }, // HWCAP_VFP
825
+ { uint8_t(CpuFeatures::ARM::kEDSP) , 7 }, // HWCAP_EDSP
826
+ { uint8_t(CpuFeatures::ARM::kASIMD) , 12 }, // HWCAP_NEON
827
+ { uint8_t(CpuFeatures::ARM::kVFPv3) , 13 }, // HWCAP_VFPv3
828
+ { uint8_t(CpuFeatures::ARM::kVFPv4) , 16 }, // HWCAP_VFPv4
829
+ { uint8_t(CpuFeatures::ARM::kIDIVA) , 17 }, // HWCAP_IDIVA
830
+ { uint8_t(CpuFeatures::ARM::kIDIVT) , 18 }, // HWCAP_IDIVT
831
+ { uint8_t(CpuFeatures::ARM::kVFP_D32) , 19 } // HWCAP_VFPD32
832
+ };
833
+
834
+ // `AT_HWCAP2` provides ARMv8+ related flags.
835
+ static const LinuxHWCapMapping hwCap2Mapping[] = {
836
+ { uint8_t(CpuFeatures::ARM::kAES) , 0 }, // HWCAP2_AES
837
+ { uint8_t(CpuFeatures::ARM::kPMULL) , 1 }, // HWCAP2_PMULL
838
+ { uint8_t(CpuFeatures::ARM::kSHA1) , 2 }, // HWCAP2_SHA1
839
+ { uint8_t(CpuFeatures::ARM::kSHA2) , 3 }, // HWCAP2_SHA2
840
+ { uint8_t(CpuFeatures::ARM::kCRC32) , 4 } // HWCAP2_CRC32
841
+ };
842
+
843
+ static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
844
+ cpu._wasDetected = true;
845
+
846
+ populateBaseARMFeatures(cpu);
847
+
848
+ CpuFeatures::ARM& features = cpu.features().arm();
849
+
850
+ detectHWCaps(cpu, AT_HWCAP, hwCapMapping, ASMJIT_ARRAY_SIZE(hwCapMapping));
851
+ detectHWCaps(cpu, AT_HWCAP2, hwCap2Mapping, ASMJIT_ARRAY_SIZE(hwCap2Mapping));
852
+
853
+ // VFPv3 implies VFPv2.
854
+ if (features.hasVFPv3())
855
+ features.add(CpuFeatures::ARM::kVFPv2);
856
+
857
+ // VFPv2 implies ARMv6.
858
+ if (features.hasVFPv2())
859
+ features.add(CpuFeatures::ARM::kARMv6);
860
+
861
+ // ARMv7 provides VFPv3|ASIMD.
862
+ if (features.hasVFPv3() || features.hasASIMD())
863
+ features.add(CpuFeatures::ARM::kARMv7);
864
+
865
+ // ARMv8 provives AES, CRC32, PMULL, SHA1, and SHA2.
866
+ if (features.hasAES() || features.hasCRC32() || features.hasPMULL() || features.hasSHA1() || features.hasSHA2())
867
+ features.add(CpuFeatures::ARM::kARMv8a);
868
+ }
869
+
870
+ #else
871
+
872
+ // `AT_HWCAP` provides ARMv8+ related flags.
873
+ static const LinuxHWCapMapping hwCapMapping[] = {
874
+ /*
875
+ { uint8_t(CpuFeatures::ARM::k) , 0 }, // HWCAP_FP
876
+ */
877
+ { uint8_t(CpuFeatures::ARM::kASIMD) , 1 }, // HWCAP_ASIMD
878
+ /*
879
+ { uint8_t(CpuFeatures::ARM::k) , 2 }, // HWCAP_EVTSTRM
880
+ */
881
+ { uint8_t(CpuFeatures::ARM::kAES) , 3 }, // HWCAP_AES
882
+ { uint8_t(CpuFeatures::ARM::kPMULL) , 4 }, // HWCAP_PMULL
883
+ { uint8_t(CpuFeatures::ARM::kSHA1) , 5 }, // HWCAP_SHA1
884
+ { uint8_t(CpuFeatures::ARM::kSHA2) , 6 }, // HWCAP_SHA2
885
+ { uint8_t(CpuFeatures::ARM::kCRC32) , 7 }, // HWCAP_CRC32
886
+ { uint8_t(CpuFeatures::ARM::kLSE) , 8 }, // HWCAP_ATOMICS
887
+ { uint8_t(CpuFeatures::ARM::kFP16CONV) , 9 }, // HWCAP_FPHP
888
+ { uint8_t(CpuFeatures::ARM::kFP16FULL) , 10 }, // HWCAP_ASIMDHP
889
+ { uint8_t(CpuFeatures::ARM::kCPUID) , 11 }, // HWCAP_CPUID
890
+ { uint8_t(CpuFeatures::ARM::kRDM) , 12 }, // HWCAP_ASIMDRDM
891
+ { uint8_t(CpuFeatures::ARM::kFJCVTZS) , 13 }, // HWCAP_JSCVT
892
+ { uint8_t(CpuFeatures::ARM::kFCMA) , 14 }, // HWCAP_FCMA
893
+ /*
894
+ { uint8_t(CpuFeatures::ARM::k) , 15 }, // HWCAP_LRCPC
895
+ { uint8_t(CpuFeatures::ARM::k) , 16 }, // HWCAP_DCPOP
896
+ */
897
+ { uint8_t(CpuFeatures::ARM::kSHA3) , 17 }, // HWCAP_SHA3
898
+ { uint8_t(CpuFeatures::ARM::kSM3) , 18 }, // HWCAP_SM3
899
+ { uint8_t(CpuFeatures::ARM::kSM4) , 19 }, // HWCAP_SM4
900
+ { uint8_t(CpuFeatures::ARM::kDOTPROD) , 20 }, // HWCAP_ASIMDDP
901
+ { uint8_t(CpuFeatures::ARM::kSHA512) , 21 }, // HWCAP_SHA512
902
+ { uint8_t(CpuFeatures::ARM::kSVE) , 22 }, // HWCAP_SVE
903
+ { uint8_t(CpuFeatures::ARM::kFP16FML) , 23 }, // HWCAP_ASIMDFHM
904
+ { uint8_t(CpuFeatures::ARM::kDIT) , 24 }, // HWCAP_DIT
905
+ /*
906
+ { uint8_t(CpuFeatures::ARM::k) , 25 }, // HWCAP_USCAT
907
+ { uint8_t(CpuFeatures::ARM::k) , 26 }, // HWCAP_ILRCPC
908
+ */
909
+ { uint8_t(CpuFeatures::ARM::kFLAGM) , 27 }, // HWCAP_FLAGM
910
+ { uint8_t(CpuFeatures::ARM::kSSBS) , 28 }, // HWCAP_SSBS
911
+ { uint8_t(CpuFeatures::ARM::kSB) , 29 } // HWCAP_SB
912
+ /*
913
+ { uint8_t(CpuFeatures::ARM::k) , 30 }, // HWCAP_PACA
914
+ { uint8_t(CpuFeatures::ARM::k) , 31 } // HWCAP_PACG
915
+ */
916
+ };
917
+
918
+ // `AT_HWCAP2` provides ARMv8+ related flags.
919
+ static const LinuxHWCapMapping hwCapMapping2[] = {
920
+ /*
921
+ { uint8_t(CpuFeatures::ARM::k) , 0 }, // HWCAP2_DCPODP
922
+ */
923
+ { uint8_t(CpuFeatures::ARM::kSVE2) , 1 }, // HWCAP2_SVE2
924
+ { uint8_t(CpuFeatures::ARM::kSVE2_AES) , 2 }, // HWCAP2_SVEAES
925
+ { uint8_t(CpuFeatures::ARM::kSVE_PMULL) , 3 }, // HWCAP2_SVEPMULL
926
+ { uint8_t(CpuFeatures::ARM::kSVE2_BITPERM), 4 }, // HWCAP2_SVEBITPERM
927
+ { uint8_t(CpuFeatures::ARM::kSVE2_SHA3) , 5 }, // HWCAP2_SVESHA3
928
+ { uint8_t(CpuFeatures::ARM::kSVE2_SM4) , 6 }, // HWCAP2_SVESM4
929
+ { uint8_t(CpuFeatures::ARM::kALTNZCV) , 7 }, // HWCAP2_FLAGM2
930
+ { uint8_t(CpuFeatures::ARM::kFRINT) , 8 }, // HWCAP2_FRINT
931
+ { uint8_t(CpuFeatures::ARM::kSVE_I8MM) , 9 }, // HWCAP2_SVEI8MM
932
+ { uint8_t(CpuFeatures::ARM::kSVE_F32MM) , 10 }, // HWCAP2_SVEF32MM
933
+ { uint8_t(CpuFeatures::ARM::kSVE_F64MM) , 11 }, // HWCAP2_SVEF64MM
934
+ { uint8_t(CpuFeatures::ARM::kSVE_BF16) , 12 }, // HWCAP2_SVEBF16
935
+ { uint8_t(CpuFeatures::ARM::kI8MM) , 13 }, // HWCAP2_I8MM
936
+ { uint8_t(CpuFeatures::ARM::kBF16) , 14 }, // HWCAP2_BF16
937
+ { uint8_t(CpuFeatures::ARM::kDGH) , 15 }, // HWCAP2_DGH
938
+ { uint8_t(CpuFeatures::ARM::kRNG) , 16 }, // HWCAP2_RNG
939
+ { uint8_t(CpuFeatures::ARM::kBTI) , 17 }, // HWCAP2_BTI
940
+ { uint8_t(CpuFeatures::ARM::kMTE) , 18 } // HWCAP2_MTE
941
+ };
942
+
943
+ static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
944
+ cpu._wasDetected = true;
945
+ populateBaseARMFeatures(cpu);
946
+
947
+ detectHWCaps(cpu, AT_HWCAP, hwCapMapping, ASMJIT_ARRAY_SIZE(hwCapMapping));
948
+ detectHWCaps(cpu, AT_HWCAP2, hwCapMapping2, ASMJIT_ARRAY_SIZE(hwCapMapping2));
949
+ }
950
+
951
+ #endif
952
+
953
+ // CpuInfo - Detect - ARM [Apple]
954
+ // ==============================
955
+
956
+ #elif defined(__APPLE__)
957
+
958
+ namespace AppleHWId {
959
+ enum CpuFamily : uint32_t {
960
+ // Generic ARM.
961
+ kCpuFamily_ARM_9 = 0xE73283AEu,
962
+ kCpuFamily_ARM_11 = 0x8FF620D8u,
963
+ kCpuFamily_ARM_12 = 0xBD1B0AE9u,
964
+ kCpuFamily_ARM_13 = 0x0CC90E64u,
965
+ kCpuFamily_ARM_14 = 0x96077EF1u,
966
+ kCpuFamily_ARM_15 = 0xA8511BCAu,
967
+
968
+ // Apple design.
969
+ kCpuFamily_SWIFT = 0x1E2D6381u,
970
+ kCpuFamily_CYCLONE = 0x37A09642u,
971
+ kCpuFamily_TYPHOON = 0x2C91A47Eu,
972
+ kCpuFamily_TWISTER = 0x92FB37C8u,
973
+ kCpuFamily_HURRICANE = 0x67CEEE93u,
974
+ kCpuFamily_MONSOON_MISTRAL = 0xE81E7EF6u,
975
+ kCpuFamily_VORTEX_TEMPEST = 0x07D34B9Fu,
976
+ kCpuFamily_LIGHTNING_THUNDER = 0x462504D2u,
977
+ kCpuFamily_FIRESTORM_ICESTORM = 0x1B588BB3u
978
+ };
979
+ };
980
+
981
+ static ASMJIT_FAVOR_SIZE uint32_t queryARMCpuFamilyId() noexcept {
982
+ uint32_t result = 0;
983
+ size_t size = sizeof(result);
984
+
985
+ int res = sysctlbyname("hw.cpufamily", &result, &size, nullptr, 0);
986
+ if (res != 0)
987
+ return 0;
988
+ else
989
+ return result;
990
+ }
991
+
992
+ static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
993
+ cpu._wasDetected = true;
994
+ populateBaseARMFeatures(cpu);
995
+
996
+ uint32_t cpuFamilyId = queryARMCpuFamilyId();
997
+ CpuFeatures::ARM& features = cpu.features().arm();
998
+
999
+ switch (cpuFamilyId) {
1000
+ case AppleHWId::kCpuFamily_ARM_9:
1001
+ case AppleHWId::kCpuFamily_ARM_11:
1002
+ case AppleHWId::kCpuFamily_ARM_12:
1003
+ break;
1004
+
1005
+ // ARM Cortex A8.
1006
+ case AppleHWId::kCpuFamily_ARM_13:
1007
+ break;
1008
+
1009
+ // ARM Cortex A9.
1010
+ case AppleHWId::kCpuFamily_ARM_14:
1011
+ break;
1012
+
1013
+ // ARM Cortex A7 - ARMv7k.
1014
+ case AppleHWId::kCpuFamily_ARM_15:
1015
+ features.add(CpuFeatures::ARM::kARMv7);
1016
+ break;
1017
+
1018
+ // Apple A6/A6X - ARMv7s.
1019
+ case AppleHWId::kCpuFamily_SWIFT:
1020
+ features.add(CpuFeatures::ARM::kARMv7);
1021
+ break;
1022
+
1023
+ // Apple A7 - ARMv8.0-A.
1024
+ case AppleHWId::kCpuFamily_CYCLONE:
1025
+ features.add(CpuFeatures::ARM::kARMv8a,
1026
+ CpuFeatures::ARM::kAES,
1027
+ CpuFeatures::ARM::kSHA1,
1028
+ CpuFeatures::ARM::kSHA2);
1029
+ break;
1030
+
1031
+ // Apple A8 - ARMv8.0-A.
1032
+ case AppleHWId::kCpuFamily_TYPHOON:
1033
+ features.add(CpuFeatures::ARM::kARMv8a,
1034
+ CpuFeatures::ARM::kAES,
1035
+ CpuFeatures::ARM::kSHA1,
1036
+ CpuFeatures::ARM::kSHA2);
1037
+ break;
1038
+
1039
+ // Apple A9 - ARMv8.0-A.
1040
+ case AppleHWId::kCpuFamily_TWISTER:
1041
+ features.add(CpuFeatures::ARM::kARMv8a,
1042
+ CpuFeatures::ARM::kAES,
1043
+ CpuFeatures::ARM::kSHA1,
1044
+ CpuFeatures::ARM::kSHA2);
1045
+ break;
1046
+
1047
+ // Apple A10 - ARMv8.1-A.
1048
+ case AppleHWId::kCpuFamily_HURRICANE:
1049
+ features.add(CpuFeatures::ARM::kARMv8_1a,
1050
+ CpuFeatures::ARM::kAES,
1051
+ CpuFeatures::ARM::kRDM,
1052
+ CpuFeatures::ARM::kSHA1,
1053
+ CpuFeatures::ARM::kSHA2);
1054
+
1055
+ break;
1056
+
1057
+ // Apple A11 - ARMv8.2-A.
1058
+ case AppleHWId::kCpuFamily_MONSOON_MISTRAL:
1059
+ features.add(CpuFeatures::ARM::kARMv8_2a,
1060
+ CpuFeatures::ARM::kAES,
1061
+ CpuFeatures::ARM::kFP16FULL,
1062
+ CpuFeatures::ARM::kSHA1,
1063
+ CpuFeatures::ARM::kSHA2);
1064
+ break;
1065
+
1066
+ // Apple A12 - ARMv8.3-A.
1067
+ case AppleHWId::kCpuFamily_VORTEX_TEMPEST:
1068
+ features.add(CpuFeatures::ARM::kARMv8_3a,
1069
+ CpuFeatures::ARM::kAES,
1070
+ CpuFeatures::ARM::kFP16FULL,
1071
+ CpuFeatures::ARM::kSHA1,
1072
+ CpuFeatures::ARM::kSHA2);
1073
+ break;
1074
+
1075
+ // Apple A13 - ARMv8.4-A.
1076
+ case AppleHWId::kCpuFamily_LIGHTNING_THUNDER:
1077
+ features.add(CpuFeatures::ARM::kARMv8_4a,
1078
+ CpuFeatures::ARM::kAES,
1079
+ CpuFeatures::ARM::kFP16FML,
1080
+ CpuFeatures::ARM::kFP16FULL,
1081
+ CpuFeatures::ARM::kSHA1,
1082
+ CpuFeatures::ARM::kSHA2,
1083
+ CpuFeatures::ARM::kSHA3,
1084
+ CpuFeatures::ARM::kSHA512);
1085
+ break;
1086
+
1087
+ // Apple A14/M1 - ARMv8.5-A.
1088
+ case AppleHWId::kCpuFamily_FIRESTORM_ICESTORM:
1089
+ features.add(CpuFeatures::ARM::kARMv8_4a,
1090
+ CpuFeatures::ARM::kAES,
1091
+ CpuFeatures::ARM::kALTNZCV,
1092
+ CpuFeatures::ARM::kFP16FML,
1093
+ CpuFeatures::ARM::kFP16FULL,
1094
+ CpuFeatures::ARM::kFRINT,
1095
+ CpuFeatures::ARM::kSB,
1096
+ CpuFeatures::ARM::kSHA1,
1097
+ CpuFeatures::ARM::kSHA2,
1098
+ CpuFeatures::ARM::kSHA3,
1099
+ CpuFeatures::ARM::kSHA512,
1100
+ CpuFeatures::ARM::kSSBS);
1101
+ break;
1102
+
1103
+ default:
1104
+ cpu._wasDetected = false;
1105
+ break;
1106
+ }
1107
+
1108
+ expandARMFeaturesByVersion(cpu);
1109
+ }
1110
+
1111
+ // CpuInfo - Detect - ARM [Unknown]
1112
+ // ================================
1113
+
1114
+ #else
1115
+
1116
+ #if ASMJIT_ARCH_ARM == 64
1117
+ #pragma message("[asmjit] Disabling runtime CPU detection - unsupported OS/CPU combination (Unknown OS with AArch64 CPU)")
1118
+ #else
1119
+ #pragma message("[asmjit] Disabling runtime CPU detection - unsupported OS/CPU combination (Unknown OS with ARM CPU)")
1120
+ #endif
1121
+
1122
+ static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept {
1123
+ populateBaseARMFeatures(cpu);
1124
+ detectARMFeaturesViaCompilerFlags(cpu);
1125
+ expandARMFeaturesByVersion(cpu);
1126
+ }
1127
+ #endif
1128
+
1129
+ #endif
1130
+
1131
+ // CpuInfo - Detect - Host
1132
+ // =======================
1133
+
1134
+ static uint32_t cpuInfoInitialized;
1135
+ static CpuInfo cpuInfoGlobal(Globals::NoInit);
1136
+
1137
+ const CpuInfo& CpuInfo::host() noexcept {
1138
+ // This should never cause a problem as the resulting information should always be the same. In the worst case we
1139
+ // would just overwrite it non-atomically.
1140
+ if (!cpuInfoInitialized) {
1141
+ CpuInfo cpuInfoLocal;
1142
+
1143
+ cpuInfoLocal._arch = Arch::kHost;
1144
+ cpuInfoLocal._subArch = SubArch::kHost;
1145
+
1146
+ #if ASMJIT_ARCH_X86
1147
+ detectX86Cpu(cpuInfoLocal);
1148
+ #elif ASMJIT_ARCH_ARM
1149
+ detectARMCpu(cpuInfoLocal);
1150
+ #else
1151
+ #pragma message("[asmjit] Disabling runtime CPU detection - unsupported OS/CPU combination (Unknown CPU)")
1152
+ #endif
1153
+
1154
+ cpuInfoLocal._hwThreadCount = detectHWThreadCount();
1155
+ cpuInfoGlobal = cpuInfoLocal;
1156
+ cpuInfoInitialized = 1;
1157
+ }
1158
+
1159
+ return cpuInfoGlobal;
1160
+ }
1161
+
1162
+ ASMJIT_END_NAMESPACE