numkong 7.4.2 → 7.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. package/README.md +100 -100
  2. package/binding.gyp +3 -0
  3. package/c/numkong.c +1 -1
  4. package/include/numkong/attention/sapphireamx.h +2 -2
  5. package/include/numkong/attention/sme.h +2 -2
  6. package/include/numkong/capabilities.h +47 -47
  7. package/include/numkong/cast/diamond.h +2 -2
  8. package/include/numkong/cast/haswell.h +2 -2
  9. package/include/numkong/cast/icelake.h +2 -2
  10. package/include/numkong/cast/loongsonasx.h +2 -2
  11. package/include/numkong/cast/neon.h +2 -2
  12. package/include/numkong/cast/powervsx.h +2 -2
  13. package/include/numkong/cast/rvv.h +2 -2
  14. package/include/numkong/cast/sapphire.h +2 -2
  15. package/include/numkong/cast/skylake.h +2 -2
  16. package/include/numkong/curved/genoa.h +2 -2
  17. package/include/numkong/curved/haswell.h +2 -2
  18. package/include/numkong/curved/neon.h +2 -2
  19. package/include/numkong/curved/neonbfdot.h +2 -2
  20. package/include/numkong/curved/rvv.h +2 -2
  21. package/include/numkong/curved/skylake.h +2 -2
  22. package/include/numkong/curved/smef64.h +2 -2
  23. package/include/numkong/dot/alder.h +2 -2
  24. package/include/numkong/dot/diamond.h +2 -2
  25. package/include/numkong/dot/genoa.h +2 -2
  26. package/include/numkong/dot/haswell.h +2 -2
  27. package/include/numkong/dot/icelake.h +2 -2
  28. package/include/numkong/dot/loongsonasx.h +2 -2
  29. package/include/numkong/dot/neon.h +2 -2
  30. package/include/numkong/dot/neonbfdot.h +2 -2
  31. package/include/numkong/dot/neonfhm.h +2 -2
  32. package/include/numkong/dot/neonfp8.h +2 -2
  33. package/include/numkong/dot/neonsdot.h +2 -2
  34. package/include/numkong/dot/rvv.h +2 -2
  35. package/include/numkong/dot/rvvbb.h +2 -2
  36. package/include/numkong/dot/rvvbf16.h +2 -2
  37. package/include/numkong/dot/rvvhalf.h +2 -2
  38. package/include/numkong/dot/sapphire.h +2 -2
  39. package/include/numkong/dot/sierra.h +2 -2
  40. package/include/numkong/dot/skylake.h +2 -2
  41. package/include/numkong/dot/sve.h +2 -2
  42. package/include/numkong/dot/svebfdot.h +2 -2
  43. package/include/numkong/dot/svehalf.h +2 -2
  44. package/include/numkong/dot/svesdot.h +2 -2
  45. package/include/numkong/dots/alder.h +2 -2
  46. package/include/numkong/dots/diamond.h +2 -2
  47. package/include/numkong/dots/genoa.h +2 -2
  48. package/include/numkong/dots/haswell.h +2 -2
  49. package/include/numkong/dots/icelake.h +2 -2
  50. package/include/numkong/dots/loongsonasx.h +2 -2
  51. package/include/numkong/dots/neon.h +2 -2
  52. package/include/numkong/dots/neonbfdot.h +2 -2
  53. package/include/numkong/dots/neonfhm.h +2 -2
  54. package/include/numkong/dots/neonfp8.h +2 -2
  55. package/include/numkong/dots/neonsdot.h +2 -2
  56. package/include/numkong/dots/powervsx.h +2 -2
  57. package/include/numkong/dots/rvv.h +2 -2
  58. package/include/numkong/dots/sapphireamx.h +2 -2
  59. package/include/numkong/dots/sierra.h +2 -2
  60. package/include/numkong/dots/skylake.h +2 -2
  61. package/include/numkong/dots/sme.h +10 -10
  62. package/include/numkong/dots/smebi32.h +2 -2
  63. package/include/numkong/dots/smef64.h +2 -2
  64. package/include/numkong/dots/smehalf.h +2 -2
  65. package/include/numkong/each/haswell.h +6 -6
  66. package/include/numkong/each/icelake.h +2 -2
  67. package/include/numkong/each/neon.h +2 -2
  68. package/include/numkong/each/neonbfdot.h +2 -2
  69. package/include/numkong/each/neonhalf.h +2 -2
  70. package/include/numkong/each/rvv.h +2 -2
  71. package/include/numkong/each/sapphire.h +2 -2
  72. package/include/numkong/each/skylake.h +2 -2
  73. package/include/numkong/geospatial/haswell.h +2 -2
  74. package/include/numkong/geospatial/neon.h +2 -2
  75. package/include/numkong/geospatial/rvv.h +2 -2
  76. package/include/numkong/geospatial/skylake.h +2 -2
  77. package/include/numkong/maxsim/alder.h +2 -2
  78. package/include/numkong/maxsim/genoa.h +2 -2
  79. package/include/numkong/maxsim/haswell.h +2 -2
  80. package/include/numkong/maxsim/icelake.h +2 -2
  81. package/include/numkong/maxsim/neonsdot.h +2 -2
  82. package/include/numkong/maxsim/sapphireamx.h +2 -2
  83. package/include/numkong/maxsim/sme.h +2 -2
  84. package/include/numkong/mesh/haswell.h +2 -2
  85. package/include/numkong/mesh/neon.h +2 -2
  86. package/include/numkong/mesh/neonbfdot.h +2 -2
  87. package/include/numkong/mesh/rvv.h +2 -2
  88. package/include/numkong/mesh/skylake.h +2 -2
  89. package/include/numkong/numkong.h +1 -1
  90. package/include/numkong/probability/haswell.h +2 -2
  91. package/include/numkong/probability/neon.h +2 -2
  92. package/include/numkong/probability/rvv.h +2 -2
  93. package/include/numkong/probability/skylake.h +2 -2
  94. package/include/numkong/reduce/alder.h +2 -2
  95. package/include/numkong/reduce/genoa.h +2 -2
  96. package/include/numkong/reduce/haswell.h +2 -2
  97. package/include/numkong/reduce/icelake.h +2 -2
  98. package/include/numkong/reduce/neon.h +2 -2
  99. package/include/numkong/reduce/neonbfdot.h +2 -2
  100. package/include/numkong/reduce/neonfhm.h +2 -2
  101. package/include/numkong/reduce/neonsdot.h +2 -2
  102. package/include/numkong/reduce/rvv.h +2 -2
  103. package/include/numkong/reduce/sierra.h +2 -2
  104. package/include/numkong/reduce/skylake.h +2 -2
  105. package/include/numkong/scalar/haswell.h +2 -2
  106. package/include/numkong/scalar/loongsonasx.h +2 -2
  107. package/include/numkong/scalar/neon.h +2 -2
  108. package/include/numkong/scalar/neonhalf.h +2 -2
  109. package/include/numkong/scalar/powervsx.h +2 -2
  110. package/include/numkong/scalar/rvv.h +2 -2
  111. package/include/numkong/scalar/sapphire.h +2 -2
  112. package/include/numkong/set/haswell.h +2 -2
  113. package/include/numkong/set/icelake.h +2 -2
  114. package/include/numkong/set/loongsonasx.h +2 -2
  115. package/include/numkong/set/neon.h +2 -2
  116. package/include/numkong/set/powervsx.h +2 -2
  117. package/include/numkong/set/rvv.h +2 -2
  118. package/include/numkong/set/rvvbb.h +2 -2
  119. package/include/numkong/set/sve.h +2 -2
  120. package/include/numkong/sets/haswell.h +2 -2
  121. package/include/numkong/sets/icelake.h +2 -2
  122. package/include/numkong/sets/loongsonasx.h +2 -2
  123. package/include/numkong/sets/neon.h +2 -2
  124. package/include/numkong/sets/powervsx.h +2 -2
  125. package/include/numkong/sets/smebi32.h +2 -2
  126. package/include/numkong/sparse/icelake.h +2 -2
  127. package/include/numkong/sparse/neon.h +2 -2
  128. package/include/numkong/sparse/sve2.h +2 -2
  129. package/include/numkong/sparse/turin.h +2 -2
  130. package/include/numkong/spatial/alder.h +2 -2
  131. package/include/numkong/spatial/diamond.h +2 -2
  132. package/include/numkong/spatial/genoa.h +2 -2
  133. package/include/numkong/spatial/haswell.h +2 -2
  134. package/include/numkong/spatial/icelake.h +2 -2
  135. package/include/numkong/spatial/loongsonasx.h +2 -2
  136. package/include/numkong/spatial/neon.h +2 -2
  137. package/include/numkong/spatial/neonbfdot.h +2 -2
  138. package/include/numkong/spatial/neonfp8.h +2 -2
  139. package/include/numkong/spatial/neonsdot.h +2 -2
  140. package/include/numkong/spatial/powervsx.h +2 -2
  141. package/include/numkong/spatial/rvv.h +2 -2
  142. package/include/numkong/spatial/rvvbf16.h +2 -2
  143. package/include/numkong/spatial/rvvhalf.h +2 -2
  144. package/include/numkong/spatial/sierra.h +2 -2
  145. package/include/numkong/spatial/skylake.h +2 -2
  146. package/include/numkong/spatial/sve.h +2 -2
  147. package/include/numkong/spatial/svebfdot.h +2 -2
  148. package/include/numkong/spatial/svehalf.h +2 -2
  149. package/include/numkong/spatial/svesdot.h +2 -2
  150. package/include/numkong/spatials/alder.h +2 -2
  151. package/include/numkong/spatials/diamond.h +2 -2
  152. package/include/numkong/spatials/genoa.h +2 -2
  153. package/include/numkong/spatials/haswell.h +2 -2
  154. package/include/numkong/spatials/icelake.h +2 -2
  155. package/include/numkong/spatials/loongsonasx.h +2 -2
  156. package/include/numkong/spatials/neon.h +2 -2
  157. package/include/numkong/spatials/neonbfdot.h +2 -2
  158. package/include/numkong/spatials/neonfhm.h +2 -2
  159. package/include/numkong/spatials/neonfp8.h +2 -2
  160. package/include/numkong/spatials/neonsdot.h +2 -2
  161. package/include/numkong/spatials/powervsx.h +2 -2
  162. package/include/numkong/spatials/rvv.h +2 -2
  163. package/include/numkong/spatials/sapphireamx.h +2 -2
  164. package/include/numkong/spatials/sierra.h +2 -2
  165. package/include/numkong/spatials/skylake.h +2 -2
  166. package/include/numkong/spatials/sme.h +2 -2
  167. package/include/numkong/spatials/smef64.h +2 -2
  168. package/include/numkong/trigonometry/haswell.h +2 -2
  169. package/include/numkong/trigonometry/neon.h +2 -2
  170. package/include/numkong/trigonometry/rvv.h +2 -2
  171. package/include/numkong/trigonometry/skylake.h +2 -2
  172. package/include/numkong/types.h +103 -89
  173. package/numkong.gypi +3 -0
  174. package/package.json +7 -7
@@ -9,7 +9,7 @@
9
9
  #ifndef NK_SPATIALS_SIERRA_H
10
10
  #define NK_SPATIALS_SIERRA_H
11
11
 
12
- #if NK_TARGET_X86_
12
+ #if NK_TARGET_X8664_
13
13
  #if NK_TARGET_SIERRA
14
14
 
15
15
  #include "numkong/spatial/haswell.h"
@@ -92,5 +92,5 @@ nk_define_cross_normalized_symmetric_(euclidean, e2m3, sierra, e2m3, f32, /*norm
92
92
  #endif
93
93
 
94
94
  #endif // NK_TARGET_SIERRA
95
- #endif // NK_TARGET_X86_
95
+ #endif // NK_TARGET_X8664_
96
96
  #endif // NK_SPATIALS_SIERRA_H
@@ -9,7 +9,7 @@
9
9
  #ifndef NK_SPATIALS_SKYLAKE_H
10
10
  #define NK_SPATIALS_SKYLAKE_H
11
11
 
12
- #if NK_TARGET_X86_
12
+ #if NK_TARGET_X8664_
13
13
  #if NK_TARGET_SKYLAKE
14
14
 
15
15
  #include "numkong/spatial/skylake.h"
@@ -180,5 +180,5 @@ nk_define_cross_normalized_symmetric_(euclidean, e3m2, skylake, e3m2, f32, /*nor
180
180
  #endif
181
181
 
182
182
  #endif // NK_TARGET_SKYLAKE
183
- #endif // NK_TARGET_X86_
183
+ #endif // NK_TARGET_X8664_
184
184
  #endif // NK_SPATIALS_SKYLAKE_H
@@ -9,7 +9,7 @@
9
9
  #ifndef NK_SPATIALS_SME_H
10
10
  #define NK_SPATIALS_SME_H
11
11
 
12
- #if NK_TARGET_ARM_
12
+ #if NK_TARGET_ARM64_
13
13
  #if NK_TARGET_SME
14
14
 
15
15
  #include "numkong/dots/serial.h"
@@ -1882,5 +1882,5 @@ NK_PUBLIC void nk_euclideans_symmetric_u4_sme(
1882
1882
  #endif
1883
1883
 
1884
1884
  #endif // NK_TARGET_SME
1885
- #endif // NK_TARGET_ARM_
1885
+ #endif // NK_TARGET_ARM64_
1886
1886
  #endif // NK_SPATIALS_SME_H
@@ -9,7 +9,7 @@
9
9
  #ifndef NK_SPATIALS_SMEF64_H
10
10
  #define NK_SPATIALS_SMEF64_H
11
11
 
12
- #if NK_TARGET_ARM_
12
+ #if NK_TARGET_ARM64_
13
13
  #if NK_TARGET_SME
14
14
 
15
15
  #include "numkong/dots/serial.h"
@@ -470,5 +470,5 @@ NK_PUBLIC void nk_euclideans_symmetric_f64_smef64(
470
470
  #endif
471
471
 
472
472
  #endif // NK_TARGET_SME
473
- #endif // NK_TARGET_ARM_
473
+ #endif // NK_TARGET_ARM64_
474
474
  #endif // NK_SPATIALS_SMEF64_H
@@ -23,7 +23,7 @@
23
23
  #ifndef NK_TRIGONOMETRY_HASWELL_H
24
24
  #define NK_TRIGONOMETRY_HASWELL_H
25
25
 
26
- #if NK_TARGET_X86_
26
+ #if NK_TARGET_X8664_
27
27
  #if NK_TARGET_HASWELL
28
28
 
29
29
  #include "numkong/types.h"
@@ -649,5 +649,5 @@ NK_PUBLIC void nk_each_atan_f64_haswell(nk_f64_t const *ins, nk_size_t n, nk_f64
649
649
  #endif
650
650
 
651
651
  #endif // NK_TARGET_HASWELL
652
- #endif // NK_TARGET_X86_
652
+ #endif // NK_TARGET_X8664_
653
653
  #endif // NK_TRIGONOMETRY_HASWELL_H
@@ -32,7 +32,7 @@
32
32
  #ifndef NK_TRIGONOMETRY_NEON_H
33
33
  #define NK_TRIGONOMETRY_NEON_H
34
34
 
35
- #if NK_TARGET_ARM_
35
+ #if NK_TARGET_ARM64_
36
36
  #if NK_TARGET_NEON
37
37
 
38
38
  #include "numkong/types.h"
@@ -634,5 +634,5 @@ NK_PUBLIC void nk_each_atan_f64_neon(nk_f64_t const *ins, nk_size_t n, nk_f64_t
634
634
  #endif
635
635
 
636
636
  #endif // NK_TARGET_NEON
637
- #endif // NK_TARGET_ARM_
637
+ #endif // NK_TARGET_ARM64_
638
638
  #endif // NK_TRIGONOMETRY_NEON_H
@@ -37,7 +37,7 @@
37
37
  #ifndef NK_TRIGONOMETRY_RVV_H
38
38
  #define NK_TRIGONOMETRY_RVV_H
39
39
 
40
- #if NK_TARGET_RISCV_
40
+ #if NK_TARGET_RISCV64_
41
41
  #if NK_TARGET_RVV
42
42
 
43
43
  #include "numkong/types.h"
@@ -696,5 +696,5 @@ NK_PUBLIC void nk_each_atan_f16_rvv(nk_f16_t const *ins, nk_size_t n, nk_f16_t *
696
696
  #endif
697
697
 
698
698
  #endif // NK_TARGET_RVV
699
- #endif // NK_TARGET_RISCV_
699
+ #endif // NK_TARGET_RISCV64_
700
700
  #endif // NK_TRIGONOMETRY_RVV_H
@@ -23,7 +23,7 @@
23
23
  #ifndef NK_TRIGONOMETRY_SKYLAKE_H
24
24
  #define NK_TRIGONOMETRY_SKYLAKE_H
25
25
 
26
- #if NK_TARGET_X86_
26
+ #if NK_TARGET_X8664_
27
27
  #if NK_TARGET_SKYLAKE
28
28
 
29
29
  #include "numkong/types.h"
@@ -721,5 +721,5 @@ NK_PUBLIC void nk_each_atan_f16_skylake(nk_f16_t const *ins, nk_size_t n, nk_f16
721
721
  #endif
722
722
 
723
723
  #endif // NK_TARGET_SKYLAKE
724
- #endif // NK_TARGET_X86_
724
+ #endif // NK_TARGET_X8664_
725
725
  #endif // NK_TRIGONOMETRY_SKYLAKE_H
@@ -7,7 +7,7 @@
7
7
  * Defines:
8
8
  *
9
9
  * - Sized aliases for numeric types, like: `nk_i32_t` and `nk_f64_t`.
10
- * - Macros for internal compiler/hardware checks, like: `NK_TARGET_ARM_`.
10
+ * - Macros for internal compiler/hardware checks, like: `NK_TARGET_ARM64_`.
11
11
  * - Macros for feature controls, like: `NK_TARGET_NEON`
12
12
  *
13
13
  * @section fp8_types FP8 Numeric Types
@@ -119,6 +119,12 @@
119
119
  #define NK_MAY_ALIAS_
120
120
  #endif
121
121
 
122
+ #if defined(__has_builtin)
123
+ #define nk_has_builtin_(x) __has_builtin(x)
124
+ #else
125
+ #define nk_has_builtin_(x) 0
126
+ #endif
127
+
122
128
  // Allow SIMD kernels to redirect small inputs to serial implementations.
123
129
  // Enabled by default for production use. Tests and benchmarks may disable
124
130
  // this to isolate SIMD path behavior on small inputs.
@@ -126,52 +132,52 @@
126
132
  #define NK_ALLOW_ISA_REDIRECT 1
127
133
  #endif
128
134
 
129
- // Compiling for Arm: NK_TARGET_ARM_
135
+ // Compiling for 64-bit Arm: NK_TARGET_ARM64_
130
136
  // https://arm-software.github.io/acle/main/acle.html
131
- #if !defined(NK_TARGET_ARM_)
137
+ #if !defined(NK_TARGET_ARM64_)
132
138
  #if defined(__aarch64__) || defined(_M_ARM64)
133
- #define NK_TARGET_ARM_ 1
139
+ #define NK_TARGET_ARM64_ 1
134
140
  #else
135
- #define NK_TARGET_ARM_ 0
141
+ #define NK_TARGET_ARM64_ 0
136
142
  #endif // defined(__aarch64__) || defined(_M_ARM64)
137
- #endif // !defined(NK_TARGET_ARM_)
143
+ #endif // !defined(NK_TARGET_ARM64_)
138
144
 
139
- // Compiling for x86: NK_TARGET_X86_
145
+ // Compiling for x86: NK_TARGET_X8664_
140
146
  // https://www.intel.com/content/www/us/en/docs/dpcpp-cpp-compiler/developer-guide-reference/2024-2/additional-predefined-macros.html
141
- #if !defined(NK_TARGET_X86_)
147
+ #if !defined(NK_TARGET_X8664_)
142
148
  #if defined(__x86_64__) || defined(_M_X64)
143
- #define NK_TARGET_X86_ 1
149
+ #define NK_TARGET_X8664_ 1
144
150
  #else
145
- #define NK_TARGET_X86_ 0
151
+ #define NK_TARGET_X8664_ 0
146
152
  #endif // defined(__x86_64__) || defined(_M_X64)
147
- #endif // !defined(NK_TARGET_X86_)
153
+ #endif // !defined(NK_TARGET_X8664_)
148
154
 
149
- // Compiling for RISC-V: NK_TARGET_RISCV_
150
- #if !defined(NK_TARGET_RISCV_)
155
+ // Compiling for RISC-V: NK_TARGET_RISCV64_
156
+ #if !defined(NK_TARGET_RISCV64_)
151
157
  #if defined(__riscv) && (__riscv_xlen == 64)
152
- #define NK_TARGET_RISCV_ 1
158
+ #define NK_TARGET_RISCV64_ 1
153
159
  #else
154
- #define NK_TARGET_RISCV_ 0
160
+ #define NK_TARGET_RISCV64_ 0
155
161
  #endif // defined(__riscv) && (__riscv_xlen == 64)
156
- #endif // !defined(NK_TARGET_RISCV_)
162
+ #endif // !defined(NK_TARGET_RISCV64_)
157
163
 
158
- // Compiling for LoongArch: NK_TARGET_LOONGARCH_
159
- #if !defined(NK_TARGET_LOONGARCH_)
164
+ // Compiling for LoongArch: NK_TARGET_LOONGARCH64_
165
+ #if !defined(NK_TARGET_LOONGARCH64_)
160
166
  #if defined(__loongarch__)
161
- #define NK_TARGET_LOONGARCH_ 1
167
+ #define NK_TARGET_LOONGARCH64_ 1
162
168
  #else
163
- #define NK_TARGET_LOONGARCH_ 0
169
+ #define NK_TARGET_LOONGARCH64_ 0
164
170
  #endif // defined(__loongarch__)
165
- #endif // !defined(NK_TARGET_LOONGARCH_)
171
+ #endif // !defined(NK_TARGET_LOONGARCH64_)
166
172
 
167
- // Compiling for Power: NK_TARGET_POWER_
168
- #if !defined(NK_TARGET_POWER_)
173
+ // Compiling for Power: NK_TARGET_POWER64_
174
+ #if !defined(NK_TARGET_POWER64_)
169
175
  #if defined(__powerpc64__) || defined(__ppc64__) || defined(_ARCH_PPC64)
170
- #define NK_TARGET_POWER_ 1
176
+ #define NK_TARGET_POWER64_ 1
171
177
  #else
172
- #define NK_TARGET_POWER_ 0
178
+ #define NK_TARGET_POWER64_ 0
173
179
  #endif // defined(__powerpc64__) || defined(__ppc64__) || defined(_ARCH_PPC64)
174
- #endif // !defined(NK_TARGET_POWER_)
180
+ #endif // !defined(NK_TARGET_POWER64_)
175
181
 
176
182
  // Compiling for WASM: NK_TARGET_WASM_
177
183
  #if !defined(NK_TARGET_WASM_)
@@ -203,7 +209,7 @@
203
209
  #endif // !defined(NK_TARGET_V128RELAXED) || ...
204
210
 
205
211
  // Compiling for RISC-V Vector: NK_TARGET_RVV
206
- #if !defined(NK_TARGET_RVV) || (NK_TARGET_RVV && !NK_TARGET_RISCV_)
212
+ #if !defined(NK_TARGET_RVV) || (NK_TARGET_RVV && !NK_TARGET_RISCV64_)
207
213
  #if defined(__riscv_v) && (__riscv_v >= 1000000)
208
214
  #define NK_TARGET_RVV 1
209
215
  #else
@@ -248,7 +254,7 @@
248
254
  // Compiling for LoongArch LASX (256-bit SIMD): NK_TARGET_LOONGSONASX
249
255
  // LASX provides 32 × 256-bit vector registers, widening integer multiply-accumulate,
250
256
  // and f32-to-f64 conversion (xvfcvtl_d_s / xvfcvth_d_s) but no widening FMA.
251
- #if !defined(NK_TARGET_LOONGSONASX) || (NK_TARGET_LOONGSONASX && !NK_TARGET_LOONGARCH_)
257
+ #if !defined(NK_TARGET_LOONGSONASX) || (NK_TARGET_LOONGSONASX && !NK_TARGET_LOONGARCH64_)
252
258
  #if defined(__loongarch_asx)
253
259
  #define NK_TARGET_LOONGSONASX 1
254
260
  #else
@@ -261,7 +267,7 @@
261
267
  // VSX provides 64 × 128-bit registers, FMA (vec_madd), vec_msum (multiply-sum), hardware f16
262
268
  // conversion (vec_extract_fp32_from_shorth/l), length-limited loads (vec_xl_len), per-byte
263
269
  // popcount (vec_popcnt), and vec_cmpne. Requires POWER9 (ISA 3.0) or newer.
264
- #if !defined(NK_TARGET_POWERVSX) || (NK_TARGET_POWERVSX && !NK_TARGET_POWER_)
270
+ #if !defined(NK_TARGET_POWERVSX) || (NK_TARGET_POWERVSX && !NK_TARGET_POWER64_)
265
271
  #if defined(__VSX__) && defined(__POWER9_VECTOR__)
266
272
  #define NK_TARGET_POWERVSX 1
267
273
  #else
@@ -270,19 +276,20 @@
270
276
  #endif // defined(__VSX__)
271
277
  #endif // !defined(NK_TARGET_POWERVSX) || ...
272
278
 
273
- // Compiling for Arm: NK_TARGET_NEON
274
- #if !defined(NK_TARGET_NEON) || (NK_TARGET_NEON && !NK_TARGET_ARM_)
275
- #if defined(__ARM_NEON) || (defined(_MSC_VER) && defined(_M_ARM64))
279
+ // Compiling for Arm: NK_TARGET_NEON (AArch64 only, AArch32 NEON is not supported)
280
+ #if !defined(NK_TARGET_NEON) || (NK_TARGET_NEON && !NK_TARGET_ARM64_)
281
+ #if (defined(__ARM_NEON) && defined(__aarch64__)) || (defined(_MSC_VER) && defined(_M_ARM64))
276
282
  #define NK_TARGET_NEON 1
277
283
  #else
278
284
  #undef NK_TARGET_NEON
279
285
  #define NK_TARGET_NEON 0
280
- #endif // defined(__ARM_NEON) || ...
286
+ #endif // (defined(__ARM_NEON) && defined(__aarch64__)) || ...
281
287
  #endif // !defined(NK_TARGET_NEON) || ...
282
288
 
283
- // Compiling for Arm: NK_TARGET_NEONSDOT (FEAT_DotProd, optional from ARMv8.1, mandatory at ARMv8.4 with AdvSIMD)
284
- #if !defined(NK_TARGET_NEONSDOT) || (NK_TARGET_NEONSDOT && !NK_TARGET_ARM_)
285
- #if defined(__ARM_FEATURE_DOTPROD) || (defined(_MSC_VER) && defined(_M_ARM64) && __ARM_ARCH >= 804)
289
+ // Compiling for Arm: NK_TARGET_NEONSDOT (FEAT_DotProd, AArch64 only)
290
+ #if !defined(NK_TARGET_NEONSDOT) || (NK_TARGET_NEONSDOT && !NK_TARGET_ARM64_)
291
+ #if (defined(__ARM_FEATURE_DOTPROD) && defined(__aarch64__)) || \
292
+ (defined(_MSC_VER) && defined(_M_ARM64) && __ARM_ARCH >= 804)
286
293
  #define NK_TARGET_NEONSDOT 1
287
294
  #else
288
295
  #undef NK_TARGET_NEONSDOT
@@ -290,9 +297,10 @@
290
297
  #endif
291
298
  #endif // !defined(NK_TARGET_NEONSDOT) || ...
292
299
 
293
- // Compiling for Arm: NK_TARGET_NEONHALF (FEAT_FP16, optional from ARMv8.2, mandatory at ARMv9.0 with AdvSIMD)
294
- #if !defined(NK_TARGET_NEONHALF) || (NK_TARGET_NEONHALF && !NK_TARGET_ARM_)
295
- #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) || (defined(_MSC_VER) && defined(_M_ARM64) && __ARM_ARCH >= 802)
300
+ // Compiling for Arm: NK_TARGET_NEONHALF (FEAT_FP16, AArch64 only)
301
+ #if !defined(NK_TARGET_NEONHALF) || (NK_TARGET_NEONHALF && !NK_TARGET_ARM64_)
302
+ #if (defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(__aarch64__)) || \
303
+ (defined(_MSC_VER) && defined(_M_ARM64) && __ARM_ARCH >= 802)
296
304
  #define NK_TARGET_NEONHALF 1
297
305
  #else
298
306
  #undef NK_TARGET_NEONHALF
@@ -300,9 +308,10 @@
300
308
  #endif
301
309
  #endif // !defined(NK_TARGET_NEONHALF) || ...
302
310
 
303
- // Compiling for Arm: NK_TARGET_NEONFHM (FEAT_FHM, optional from ARMv8.1, mandatory at ARMv8.4 with FP16)
304
- #if !defined(NK_TARGET_NEONFHM) || (NK_TARGET_NEONFHM && !NK_TARGET_ARM_)
305
- #if defined(__ARM_FEATURE_FP16_FML) || (defined(_MSC_VER) && defined(_M_ARM64) && __ARM_ARCH >= 804)
311
+ // Compiling for Arm: NK_TARGET_NEONFHM (FEAT_FHM, AArch64 only)
312
+ #if !defined(NK_TARGET_NEONFHM) || (NK_TARGET_NEONFHM && !NK_TARGET_ARM64_)
313
+ #if (defined(__ARM_FEATURE_FP16_FML) && defined(__aarch64__)) || \
314
+ (defined(_MSC_VER) && defined(_M_ARM64) && __ARM_ARCH >= 804)
306
315
  #define NK_TARGET_NEONFHM 1
307
316
  #else
308
317
  #undef NK_TARGET_NEONFHM
@@ -310,9 +319,10 @@
310
319
  #endif
311
320
  #endif // !defined(NK_TARGET_NEONFHM) || ...
312
321
 
313
- // Compiling for Arm: NK_TARGET_NEONBFDOT (FEAT_BF16, optional from ARMv8.2, mandatory at ARMv8.6 with FP)
314
- #if !defined(NK_TARGET_NEONBFDOT) || (NK_TARGET_NEONBFDOT && !NK_TARGET_ARM_)
315
- #if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || (defined(_MSC_VER) && defined(_M_ARM64) && __ARM_ARCH >= 806)
322
+ // Compiling for Arm: NK_TARGET_NEONBFDOT (FEAT_BF16, AArch64 only)
323
+ #if !defined(NK_TARGET_NEONBFDOT) || (NK_TARGET_NEONBFDOT && !NK_TARGET_ARM64_)
324
+ #if (defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) && defined(__aarch64__)) || \
325
+ (defined(_MSC_VER) && defined(_M_ARM64) && __ARM_ARCH >= 806)
316
326
  #define NK_TARGET_NEONBFDOT 1
317
327
  #else
318
328
  #undef NK_TARGET_NEONBFDOT
@@ -323,8 +333,8 @@
323
333
  // Compiling for Arm: NK_TARGET_NEONFP8 (NEON FP8 extensions, FEAT_FP8DOT4)
324
334
  // ACLE macro __ARM_FEATURE_FP8DOT4 defined by GCC 15+ and Clang 21+ when +fp8dot4 is enabled.
325
335
  // Older compilers lack mfloat8x16_t and the fp8dot4 target attribute entirely.
326
- #if !defined(NK_TARGET_NEONFP8) || (NK_TARGET_NEONFP8 && !NK_TARGET_ARM_)
327
- #if defined(__ARM_FEATURE_FP8DOT4)
336
+ #if !defined(NK_TARGET_NEONFP8) || (NK_TARGET_NEONFP8 && !NK_TARGET_ARM64_)
337
+ #if defined(__ARM_FEATURE_FP8DOT4) && defined(__aarch64__)
328
338
  #define NK_TARGET_NEONFP8 1
329
339
  #else
330
340
  #undef NK_TARGET_NEONFP8
@@ -333,7 +343,7 @@
333
343
  #endif // !defined(NK_TARGET_NEONFP8) || ...
334
344
 
335
345
  // Compiling for Arm: NK_TARGET_SVE
336
- #if !defined(NK_TARGET_SVE) || (NK_TARGET_SVE && !NK_TARGET_ARM_)
346
+ #if !defined(NK_TARGET_SVE) || (NK_TARGET_SVE && !NK_TARGET_ARM64_)
337
347
  #if defined(__ARM_FEATURE_SVE)
338
348
  #define NK_TARGET_SVE 1
339
349
  #else
@@ -343,7 +353,7 @@
343
353
  #endif // !defined(NK_TARGET_SVE) || ...
344
354
 
345
355
  // Compiling for Arm: NK_TARGET_SVESDOT
346
- #if !defined(NK_TARGET_SVESDOT) || (NK_TARGET_SVESDOT && !NK_TARGET_ARM_)
356
+ #if !defined(NK_TARGET_SVESDOT) || (NK_TARGET_SVESDOT && !NK_TARGET_ARM64_)
347
357
  #if defined(__ARM_FEATURE_SVE)
348
358
  #define NK_TARGET_SVESDOT 1
349
359
  #else
@@ -353,7 +363,7 @@
353
363
  #endif // !defined(NK_TARGET_SVESDOT) || ...
354
364
 
355
365
  // Compiling for Arm: NK_TARGET_SVEHALF
356
- #if !defined(NK_TARGET_SVEHALF) || (NK_TARGET_SVEHALF && !NK_TARGET_ARM_)
366
+ #if !defined(NK_TARGET_SVEHALF) || (NK_TARGET_SVEHALF && !NK_TARGET_ARM64_)
357
367
  #if defined(__ARM_FEATURE_SVE)
358
368
  #define NK_TARGET_SVEHALF 1
359
369
  #else
@@ -363,7 +373,7 @@
363
373
  #endif // !defined(NK_TARGET_SVEHALF) || ...
364
374
 
365
375
  // Compiling for Arm: NK_TARGET_SVEBFDOT
366
- #if !defined(NK_TARGET_SVEBFDOT) || (NK_TARGET_SVEBFDOT && !NK_TARGET_ARM_)
376
+ #if !defined(NK_TARGET_SVEBFDOT) || (NK_TARGET_SVEBFDOT && !NK_TARGET_ARM64_)
367
377
  #if defined(__ARM_FEATURE_SVE)
368
378
  #define NK_TARGET_SVEBFDOT 1
369
379
  #else
@@ -373,7 +383,7 @@
373
383
  #endif // !defined(NK_TARGET_SVEBFDOT) || ...
374
384
 
375
385
  // Compiling for Arm: NK_TARGET_SVE2
376
- #if !defined(NK_TARGET_SVE2) || (NK_TARGET_SVE2 && !NK_TARGET_ARM_)
386
+ #if !defined(NK_TARGET_SVE2) || (NK_TARGET_SVE2 && !NK_TARGET_ARM64_)
377
387
  #if defined(__ARM_FEATURE_SVE2)
378
388
  #define NK_TARGET_SVE2 1
379
389
  #else
@@ -383,13 +393,13 @@
383
393
  #endif // !defined(NK_TARGET_SVE2) || ...
384
394
 
385
395
  // Compiling for Arm: NK_TARGET_SVE2P1
386
- #if !defined(NK_TARGET_SVE2P1) || (NK_TARGET_SVE2P1 && !NK_TARGET_ARM_)
396
+ #if !defined(NK_TARGET_SVE2P1) || (NK_TARGET_SVE2P1 && !NK_TARGET_ARM64_)
387
397
  #undef NK_TARGET_SVE2P1
388
398
  #define NK_TARGET_SVE2P1 0
389
399
  #endif // !defined(NK_TARGET_SVE2P1) || ...
390
400
 
391
401
  // Compiling for Arm: NK_TARGET_SME (Scalable Matrix Extension)
392
- #if !defined(NK_TARGET_SME) || (NK_TARGET_SME && !NK_TARGET_ARM_)
402
+ #if !defined(NK_TARGET_SME) || (NK_TARGET_SME && !NK_TARGET_ARM64_)
393
403
  #if defined(__ARM_FEATURE_SME)
394
404
  #define NK_TARGET_SME 1
395
405
  #else
@@ -398,7 +408,7 @@
398
408
  #endif // defined(__ARM_FEATURE_SME)
399
409
  #endif // !defined(NK_TARGET_SME) || ...
400
410
 
401
- #if !defined(NK_TARGET_SME2) || (NK_TARGET_SME2 && !NK_TARGET_ARM_)
411
+ #if !defined(NK_TARGET_SME2) || (NK_TARGET_SME2 && !NK_TARGET_ARM64_)
402
412
  #if defined(__ARM_FEATURE_SME2)
403
413
  #define NK_TARGET_SME2 1
404
414
  #else
@@ -409,7 +419,7 @@
409
419
 
410
420
  // Compiling for Arm: NK_TARGET_SME2P1 (FEAT_SME2p1)
411
421
  // ACLE macro: __ARM_FEATURE_SME2p1 (note lowercase 'p')
412
- #if !defined(NK_TARGET_SME2P1) || (NK_TARGET_SME2P1 && !NK_TARGET_ARM_)
422
+ #if !defined(NK_TARGET_SME2P1) || (NK_TARGET_SME2P1 && !NK_TARGET_ARM64_)
413
423
  #if defined(__ARM_FEATURE_SME2p1)
414
424
  #define NK_TARGET_SME2P1 1
415
425
  #else
@@ -420,8 +430,8 @@
420
430
 
421
431
  // AppleClang 17 exposes SME sub-features through `arm_sme.h` builtin aliases,
422
432
  // not dedicated `__ARM_FEATURE_*` predefines for every matrix subtype.
423
- #if !defined(NK_TARGET_SMEF64) || (NK_TARGET_SMEF64 && !NK_TARGET_ARM_)
424
- #if defined(__ARM_FEATURE_SME_F64F64) || (defined(__has_builtin) && __has_builtin(__builtin_sme_svmopa_za64_f64_m))
433
+ #if !defined(NK_TARGET_SMEF64) || (NK_TARGET_SMEF64 && !NK_TARGET_ARM64_)
434
+ #if defined(__ARM_FEATURE_SME_F64F64) || nk_has_builtin_(__builtin_sme_svmopa_za64_f64_m)
425
435
  #define NK_TARGET_SMEF64 1
426
436
  #else
427
437
  #undef NK_TARGET_SMEF64
@@ -429,44 +439,44 @@
429
439
  #endif // defined(__ARM_FEATURE_SME_F64F64) || ...
430
440
  #endif // !defined(NK_TARGET_SMEF64) || ...
431
441
 
432
- #if !defined(NK_TARGET_SMEBI32) || (NK_TARGET_SMEBI32 && !NK_TARGET_ARM_)
433
- #if defined(__has_builtin) && __has_builtin(__builtin_sme_svbmopa_za32_u32_m)
442
+ #if !defined(NK_TARGET_SMEBI32) || (NK_TARGET_SMEBI32 && !NK_TARGET_ARM64_)
443
+ #if nk_has_builtin_(__builtin_sme_svbmopa_za32_u32_m)
434
444
  #define NK_TARGET_SMEBI32 1
435
445
  #else
436
446
  #undef NK_TARGET_SMEBI32
437
447
  #define NK_TARGET_SMEBI32 0
438
- #endif // defined(__has_builtin) && __has_builtin(__builtin_sme_svbmopa_za32_u32_m)
448
+ #endif // nk_has_builtin_(__builtin_sme_svbmopa_za32_u32_m)
439
449
  #endif // !defined(NK_TARGET_SMEBI32) || ...
440
450
 
441
- #if !defined(NK_TARGET_SMEHALF) || (NK_TARGET_SMEHALF && !NK_TARGET_ARM_)
442
- #if defined(__ARM_FEATURE_SME_F16F16) || (defined(__has_builtin) && __has_builtin(__builtin_sme_svmopa_za32_f16_m))
451
+ #if !defined(NK_TARGET_SMEHALF) || (NK_TARGET_SMEHALF && !NK_TARGET_ARM64_)
452
+ #if defined(__ARM_FEATURE_SME_F16F16) || nk_has_builtin_(__builtin_sme_svmopa_za32_f16_m)
443
453
  #define NK_TARGET_SMEHALF 1
444
454
  #else
445
455
  #undef NK_TARGET_SMEHALF
446
456
  #define NK_TARGET_SMEHALF 0
447
- #endif // defined(__has_builtin) && __has_builtin(__builtin_sme_svmopa_za32_f16_m)
457
+ #endif // nk_has_builtin_(__builtin_sme_svmopa_za32_f16_m)
448
458
  #endif // !defined(NK_TARGET_SMEHALF) || ...
449
459
 
450
- #if !defined(NK_TARGET_SMEBF16) || (NK_TARGET_SMEBF16 && !NK_TARGET_ARM_)
451
- #if defined(__has_builtin) && __has_builtin(__builtin_sme_svmopa_za32_bf16_m)
460
+ #if !defined(NK_TARGET_SMEBF16) || (NK_TARGET_SMEBF16 && !NK_TARGET_ARM64_)
461
+ #if nk_has_builtin_(__builtin_sme_svmopa_za32_bf16_m)
452
462
  #define NK_TARGET_SMEBF16 1
453
463
  #else
454
464
  #undef NK_TARGET_SMEBF16
455
465
  #define NK_TARGET_SMEBF16 0
456
- #endif // defined(__has_builtin) && __has_builtin(__builtin_sme_svmopa_za32_bf16_m)
466
+ #endif // nk_has_builtin_(__builtin_sme_svmopa_za32_bf16_m)
457
467
  #endif // !defined(NK_TARGET_SMEBF16) || ...
458
468
 
459
- #if !defined(NK_TARGET_SMELUT2) || (NK_TARGET_SMELUT2 && !NK_TARGET_ARM_)
460
- #if defined(__has_builtin) && __has_builtin(__builtin_sme_svluti2_lane_zt_u8)
469
+ #if !defined(NK_TARGET_SMELUT2) || (NK_TARGET_SMELUT2 && !NK_TARGET_ARM64_)
470
+ #if nk_has_builtin_(__builtin_sme_svluti2_lane_zt_u8)
461
471
  #define NK_TARGET_SMELUT2 1
462
472
  #else
463
473
  #undef NK_TARGET_SMELUT2
464
474
  #define NK_TARGET_SMELUT2 0
465
- #endif // defined(__has_builtin) && __has_builtin(__builtin_sme_svluti2_lane_zt_u8)
475
+ #endif // nk_has_builtin_(__builtin_sme_svluti2_lane_zt_u8)
466
476
  #endif // !defined(NK_TARGET_SMELUT2) || ...
467
477
 
468
478
  // Compiling for Arm: NK_TARGET_SMEFA64 (FEAT_SME_FA64, full SVE2 in streaming mode)
469
- #if !defined(NK_TARGET_SMEFA64) || (NK_TARGET_SMEFA64 && !NK_TARGET_ARM_)
479
+ #if !defined(NK_TARGET_SMEFA64) || (NK_TARGET_SMEFA64 && !NK_TARGET_ARM64_)
470
480
  #if defined(__ARM_FEATURE_SME_FA64)
471
481
  #define NK_TARGET_SMEFA64 1
472
482
  #else
@@ -491,7 +501,7 @@
491
501
  // - _MSC_VER >= 1900 (VS 2015+): AVX2/FMA/F16C (Haswell)
492
502
  // - _MSC_VER >= 1920 (VS 2019+): AVX-512 base (Skylake, Icelake), AVX-VNNI (Alder)
493
503
  // - _MSC_VER >= 1944 (VS 2022 17.14+): BF16, FP16, VP2INTERSECT, VNNI-INT8 (Sierra), AMX
494
- #if !defined(NK_TARGET_HASWELL) || (NK_TARGET_HASWELL && !NK_TARGET_X86_)
504
+ #if !defined(NK_TARGET_HASWELL) || (NK_TARGET_HASWELL && !NK_TARGET_X8664_)
495
505
  #if (defined(__AVX2__) && defined(__FMA__) && defined(__F16C__)) || (defined(_MSC_VER) && _MSC_VER >= 1900)
496
506
  #define NK_TARGET_HASWELL 1
497
507
  #else
@@ -507,7 +517,7 @@
507
517
  // gcc-12 -march=sapphirerapids -dM -E - < /dev/null | egrep "SSE|AVX" | sort
508
518
  // On Arm machines you may want to check for other flags:
509
519
  // gcc-12 -march=native -dM -E - < /dev/null | egrep "NEON|SVE|FP16|FMA" | sort
510
- #if !defined(NK_TARGET_SKYLAKE) || (NK_TARGET_SKYLAKE && !NK_TARGET_X86_)
520
+ #if !defined(NK_TARGET_SKYLAKE) || (NK_TARGET_SKYLAKE && !NK_TARGET_X8664_)
511
521
  #if (defined(__AVX512F__) && defined(__AVX512CD__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && \
512
522
  defined(__AVX512BW__)) || \
513
523
  (defined(_MSC_VER) && _MSC_VER >= 1920)
@@ -518,7 +528,7 @@
518
528
  #endif
519
529
  #endif // !defined(NK_TARGET_SKYLAKE) || ...
520
530
 
521
- #if !defined(NK_TARGET_ICELAKE) || (NK_TARGET_ICELAKE && !NK_TARGET_X86_)
531
+ #if !defined(NK_TARGET_ICELAKE) || (NK_TARGET_ICELAKE && !NK_TARGET_X8664_)
522
532
  #if (defined(__AVX512VNNI__) && defined(__AVX512IFMA__) && defined(__AVX512BITALG__) && defined(__AVX512VBMI__) && \
523
533
  defined(__AVX512VBMI2__) && defined(__AVX512VPOPCNTDQ__)) || \
524
534
  (defined(_MSC_VER) && _MSC_VER >= 1920)
@@ -529,7 +539,7 @@
529
539
  #endif
530
540
  #endif // !defined(NK_TARGET_ICELAKE) || ...
531
541
 
532
- #if !defined(NK_TARGET_GENOA) || (NK_TARGET_GENOA && !NK_TARGET_X86_)
542
+ #if !defined(NK_TARGET_GENOA) || (NK_TARGET_GENOA && !NK_TARGET_X8664_)
533
543
  #if defined(__AVX512BF16__) || (defined(_MSC_VER) && _MSC_VER >= 1944)
534
544
  #define NK_TARGET_GENOA 1
535
545
  #else
@@ -542,7 +552,7 @@
542
552
  // GCC 14+: defines __AVX10_2__ with -mavx10.2-512
543
553
  // Clang 19+: defines __AVX10_2__ with -mavx10.2-512
544
554
  // MSVC: defines __AVX10_VER__ >= 2 with /arch:AVX10.2 (VS 2026+, not yet released)
545
- #if !defined(NK_TARGET_DIAMOND) || (NK_TARGET_DIAMOND && !NK_TARGET_X86_)
555
+ #if !defined(NK_TARGET_DIAMOND) || (NK_TARGET_DIAMOND && !NK_TARGET_X8664_)
546
556
  #if defined(__AVX10_2__) || (defined(__AVX10_VER__) && __AVX10_VER__ >= 2)
547
557
  #define NK_TARGET_DIAMOND 1
548
558
  #else
@@ -551,7 +561,7 @@
551
561
  #endif // defined(__AVX10_2__) || ...
552
562
  #endif // !defined(NK_TARGET_DIAMOND) || ...
553
563
 
554
- #if !defined(NK_TARGET_SAPPHIRE) || (NK_TARGET_SAPPHIRE && !NK_TARGET_X86_)
564
+ #if !defined(NK_TARGET_SAPPHIRE) || (NK_TARGET_SAPPHIRE && !NK_TARGET_X8664_)
555
565
  #if defined(__AVX512FP16__) || (defined(_MSC_VER) && _MSC_VER >= 1944)
556
566
  #define NK_TARGET_SAPPHIRE 1
557
567
  #else
@@ -560,7 +570,7 @@
560
570
  #endif
561
571
  #endif // !defined(NK_TARGET_SAPPHIRE) || ...
562
572
 
563
- #if !defined(NK_TARGET_SAPPHIREAMX) || (NK_TARGET_SAPPHIREAMX && !NK_TARGET_X86_)
573
+ #if !defined(NK_TARGET_SAPPHIREAMX) || (NK_TARGET_SAPPHIREAMX && !NK_TARGET_X8664_)
564
574
  #if (defined(__AMX_TILE__) && defined(__AMX_BF16__) && defined(__AMX_INT8__)) || (defined(_MSC_VER) && _MSC_VER >= 1944)
565
575
  #define NK_TARGET_SAPPHIREAMX 1
566
576
  #else
@@ -569,7 +579,7 @@
569
579
  #endif
570
580
  #endif // !defined(NK_TARGET_SAPPHIREAMX) || ...
571
581
 
572
- #if !defined(NK_TARGET_GRANITEAMX) || (NK_TARGET_GRANITEAMX && !NK_TARGET_X86_)
582
+ #if !defined(NK_TARGET_GRANITEAMX) || (NK_TARGET_GRANITEAMX && !NK_TARGET_X8664_)
573
583
  #if (defined(__AMX_TILE__) && defined(__AMX_FP16__)) || (defined(_MSC_VER) && _MSC_VER >= 1944)
574
584
  #define NK_TARGET_GRANITEAMX 1
575
585
  #else
@@ -578,7 +588,7 @@
578
588
  #endif
579
589
  #endif // !defined(NK_TARGET_GRANITEAMX) || ...
580
590
 
581
- #if !defined(NK_TARGET_TURIN) || (NK_TARGET_TURIN && !NK_TARGET_X86_)
591
+ #if !defined(NK_TARGET_TURIN) || (NK_TARGET_TURIN && !NK_TARGET_X8664_)
582
592
  #if defined(__AVX512VP2INTERSECT__) || (defined(_MSC_VER) && _MSC_VER >= 1944)
583
593
  #define NK_TARGET_TURIN 1
584
594
  #else
@@ -587,7 +597,7 @@
587
597
  #endif
588
598
  #endif // !defined(NK_TARGET_TURIN) || ...
589
599
 
590
- #if !defined(NK_TARGET_ALDER) || (NK_TARGET_ALDER && !NK_TARGET_X86_)
600
+ #if !defined(NK_TARGET_ALDER) || (NK_TARGET_ALDER && !NK_TARGET_X8664_)
591
601
  #if defined(__AVXVNNI__) || (defined(_MSC_VER) && _MSC_VER >= 1920)
592
602
  #define NK_TARGET_ALDER 1
593
603
  #else
@@ -596,7 +606,7 @@
596
606
  #endif
597
607
  #endif // !defined(NK_TARGET_ALDER) || ...
598
608
 
599
- #if !defined(NK_TARGET_SIERRA) || (NK_TARGET_SIERRA && !NK_TARGET_X86_)
609
+ #if !defined(NK_TARGET_SIERRA) || (NK_TARGET_SIERRA && !NK_TARGET_X8664_)
600
610
  #if defined(__AVXVNNIINT8__) || (defined(_MSC_VER) && _MSC_VER >= 1944)
601
611
  #define NK_TARGET_SIERRA 1
602
612
  #else
@@ -671,7 +681,7 @@
671
681
  * NK_STREAMING_ marks functions that require streaming SVE mode (e.g. FCVTLT).
672
682
  * NK_STREAMING_COMPATIBLE_ marks helpers callable from both streaming and non-streaming mode.
673
683
  */
674
- #if NK_TARGET_ARM_ && NK_TARGET_SME
684
+ #if NK_TARGET_ARM64_ && NK_TARGET_SME
675
685
  #define NK_STREAMING_ __arm_streaming
676
686
  #define NK_STREAMING_COMPATIBLE_ __arm_streaming_compatible
677
687
  #else
@@ -684,7 +694,7 @@
684
694
  * MSVC typedefs `__m512bh`, `__m512h`, `__m256bh` as aliases for `__m512i`/`__m256i`,
685
695
  * but rejects C-style casts between them. GCC/Clang define them as distinct types.
686
696
  */
687
- #if NK_TARGET_X86_
697
+ #if NK_TARGET_X8664_
688
698
  #if defined(_MSC_VER)
689
699
  #define nk_m512bh_from_m512i_(x) (x)
690
700
  #define nk_m512h_from_m512i_(x) (x)
@@ -804,7 +814,7 @@ typedef unsigned int nk_u32_t;
804
814
  /* On LP64 targets (Linux ARM64, RISC-V 64), `long` and `long long` are both 64-bit but distinct types.
805
815
  * NEON/RVV intrinsics on Linux expect `long*`, while Apple's NEON intrinsics expect `long long*`.
806
816
  * Windows uses LLP64 where `long` is 32-bit, so it must use `long long` for 64-bit types. */
807
- #if ((NK_TARGET_ARM_ && !defined(NK_DEFINED_APPLE_)) || NK_TARGET_RISCV_) && !defined(NK_DEFINED_WINDOWS_)
817
+ #if ((NK_TARGET_ARM64_ && !defined(NK_DEFINED_APPLE_)) || NK_TARGET_RISCV64_) && !defined(NK_DEFINED_WINDOWS_)
808
818
  /** @brief Signed 64-bit integer. Range: [−2⁶³, +2⁶³−1]. */
809
819
  typedef signed long nk_i64_t;
810
820
  /** @brief Unsigned 64-bit integer. Range: [0, 2⁶⁴−1]. */
@@ -821,7 +831,7 @@ typedef float nk_f32_t;
821
831
  /** @brief Double-precision (64-bit) IEEE 754 float. sign(1) + exponent(11) + mantissa(52), bias=1023. */
822
832
  typedef double nk_f64_t;
823
833
 
824
- #if NK_TARGET_X86_ || NK_TARGET_ARM_ || NK_TARGET_RISCV_ || NK_TARGET_POWER_ || NK_TARGET_LOONGARCH_
834
+ #if NK_TARGET_X8664_ || NK_TARGET_ARM64_ || NK_TARGET_RISCV64_ || NK_TARGET_POWER64_ || NK_TARGET_LOONGARCH64_
825
835
  #define NK_IS_64BIT_ 1
826
836
  #else
827
837
  #define NK_IS_64BIT_ 0
@@ -1088,7 +1098,7 @@ typedef unsigned short nk_bf16_t;
1088
1098
  * Some of those are defined as aliases, so we use `#define` preprocessor
1089
1099
  * directives instead of `typedef` to avoid errors.
1090
1100
  */
1091
- #if NK_TARGET_ARM_
1101
+ #if NK_TARGET_ARM64_
1092
1102
  #if defined(_MSC_VER)
1093
1103
  #define nk_f16_for_arm_simd_t nk_f16_t
1094
1104
  #define nk_bf16_for_arm_simd_t nk_bf16_t
@@ -1102,7 +1112,7 @@ typedef unsigned short nk_bf16_t;
1102
1112
  * RISC-V Vector (RVV) intrinsics use `_Float16` for half-precision floats.
1103
1113
  * This is the standard C23 type, also available in GCC/Clang with RVV extensions.
1104
1114
  */
1105
- #if NK_TARGET_RISCV_
1115
+ #if NK_TARGET_RISCV64_
1106
1116
  #define nk_f16_for_rvv_intrinsics_t _Float16
1107
1117
  #endif
1108
1118
 
@@ -1237,6 +1247,8 @@ typedef union NK_MAY_ALIAS_ nk_b128_vec_t {
1237
1247
  int32x4_t i32x4;
1238
1248
  int64x2_t i64x2;
1239
1249
  float32x4_t f32x4;
1250
+ #endif
1251
+ #if NK_TARGET_NEON && NK_TARGET_ARM64_ // double-precision NEON requires AArch64
1240
1252
  float64x2_t f64x2;
1241
1253
  #endif
1242
1254
  #if NK_TARGET_NEONHALF
@@ -1294,6 +1306,8 @@ typedef union NK_MAY_ALIAS_ nk_b256_vec_t {
1294
1306
  int32x4_t i32x4s[2];
1295
1307
  int64x2_t i64x2s[2];
1296
1308
  float32x4_t f32x4s[2];
1309
+ #endif
1310
+ #if NK_TARGET_NEON && NK_TARGET_ARM64_ // double-precision NEON requires AArch64
1297
1311
  float64x2_t f64x2s[2];
1298
1312
  #endif
1299
1313
  #if NK_TARGET_POWERVSX
@@ -1588,7 +1602,7 @@ NK_INTERNAL int nk_bf16_is_nan_(nk_bf16_t x) {
1588
1602
  * SMSTART SM / SMSTOP SM so the calling function's ABI is unchanged.
1589
1603
  * Inside `__arm_locally_streaming` functions the plain `svcntXX()` intrinsics are fine.
1590
1604
  */
1591
- #if NK_TARGET_ARM_ && NK_TARGET_SME
1605
+ #if NK_TARGET_ARM64_ && NK_TARGET_SME
1592
1606
  /** @brief Streaming SVL byte-element count (SVL/8) via SMSTART SM bracket. */
1593
1607
  NK_INTERNAL nk_size_t nk_sme_cntb_(void) {
1594
1608
  nk_u64_t r;
package/numkong.gypi CHANGED
@@ -79,6 +79,9 @@
79
79
  "ForcedIncludeFiles": [
80
80
  "<!(node -e \"console.log(require('path').resolve('<(numkong_root)','nk_probes.h'))\")",
81
81
  ],
82
+ "AdditionalOptions": [
83
+ "/Zc:preprocessor"
84
+ ],
82
85
  },
83
86
  },
84
87
  "conditions": [