numkong 7.4.2 → 7.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/README.md +84 -84
  2. package/c/numkong.c +1 -1
  3. package/include/numkong/attention/sapphireamx.h +2 -2
  4. package/include/numkong/attention/sme.h +2 -2
  5. package/include/numkong/capabilities.h +47 -47
  6. package/include/numkong/cast/diamond.h +2 -2
  7. package/include/numkong/cast/haswell.h +2 -2
  8. package/include/numkong/cast/icelake.h +2 -2
  9. package/include/numkong/cast/loongsonasx.h +2 -2
  10. package/include/numkong/cast/neon.h +2 -2
  11. package/include/numkong/cast/powervsx.h +2 -2
  12. package/include/numkong/cast/rvv.h +2 -2
  13. package/include/numkong/cast/sapphire.h +2 -2
  14. package/include/numkong/cast/skylake.h +2 -2
  15. package/include/numkong/curved/genoa.h +2 -2
  16. package/include/numkong/curved/haswell.h +2 -2
  17. package/include/numkong/curved/neon.h +2 -2
  18. package/include/numkong/curved/neonbfdot.h +2 -2
  19. package/include/numkong/curved/rvv.h +2 -2
  20. package/include/numkong/curved/skylake.h +2 -2
  21. package/include/numkong/curved/smef64.h +2 -2
  22. package/include/numkong/dot/alder.h +2 -2
  23. package/include/numkong/dot/diamond.h +2 -2
  24. package/include/numkong/dot/genoa.h +2 -2
  25. package/include/numkong/dot/haswell.h +2 -2
  26. package/include/numkong/dot/icelake.h +2 -2
  27. package/include/numkong/dot/loongsonasx.h +2 -2
  28. package/include/numkong/dot/neon.h +2 -2
  29. package/include/numkong/dot/neonbfdot.h +2 -2
  30. package/include/numkong/dot/neonfhm.h +2 -2
  31. package/include/numkong/dot/neonfp8.h +2 -2
  32. package/include/numkong/dot/neonsdot.h +2 -2
  33. package/include/numkong/dot/rvv.h +2 -2
  34. package/include/numkong/dot/rvvbb.h +2 -2
  35. package/include/numkong/dot/rvvbf16.h +2 -2
  36. package/include/numkong/dot/rvvhalf.h +2 -2
  37. package/include/numkong/dot/sapphire.h +2 -2
  38. package/include/numkong/dot/sierra.h +2 -2
  39. package/include/numkong/dot/skylake.h +2 -2
  40. package/include/numkong/dot/sve.h +2 -2
  41. package/include/numkong/dot/svebfdot.h +2 -2
  42. package/include/numkong/dot/svehalf.h +2 -2
  43. package/include/numkong/dot/svesdot.h +2 -2
  44. package/include/numkong/dots/alder.h +2 -2
  45. package/include/numkong/dots/diamond.h +2 -2
  46. package/include/numkong/dots/genoa.h +2 -2
  47. package/include/numkong/dots/haswell.h +2 -2
  48. package/include/numkong/dots/icelake.h +2 -2
  49. package/include/numkong/dots/loongsonasx.h +2 -2
  50. package/include/numkong/dots/neon.h +2 -2
  51. package/include/numkong/dots/neonbfdot.h +2 -2
  52. package/include/numkong/dots/neonfhm.h +2 -2
  53. package/include/numkong/dots/neonfp8.h +2 -2
  54. package/include/numkong/dots/neonsdot.h +2 -2
  55. package/include/numkong/dots/powervsx.h +2 -2
  56. package/include/numkong/dots/rvv.h +2 -2
  57. package/include/numkong/dots/sapphireamx.h +2 -2
  58. package/include/numkong/dots/sierra.h +2 -2
  59. package/include/numkong/dots/skylake.h +2 -2
  60. package/include/numkong/dots/sme.h +10 -10
  61. package/include/numkong/dots/smebi32.h +2 -2
  62. package/include/numkong/dots/smef64.h +2 -2
  63. package/include/numkong/dots/smehalf.h +2 -2
  64. package/include/numkong/each/haswell.h +2 -2
  65. package/include/numkong/each/icelake.h +2 -2
  66. package/include/numkong/each/neon.h +2 -2
  67. package/include/numkong/each/neonbfdot.h +2 -2
  68. package/include/numkong/each/neonhalf.h +2 -2
  69. package/include/numkong/each/rvv.h +2 -2
  70. package/include/numkong/each/sapphire.h +2 -2
  71. package/include/numkong/each/skylake.h +2 -2
  72. package/include/numkong/geospatial/haswell.h +2 -2
  73. package/include/numkong/geospatial/neon.h +2 -2
  74. package/include/numkong/geospatial/rvv.h +2 -2
  75. package/include/numkong/geospatial/skylake.h +2 -2
  76. package/include/numkong/maxsim/alder.h +2 -2
  77. package/include/numkong/maxsim/genoa.h +2 -2
  78. package/include/numkong/maxsim/haswell.h +2 -2
  79. package/include/numkong/maxsim/icelake.h +2 -2
  80. package/include/numkong/maxsim/neonsdot.h +2 -2
  81. package/include/numkong/maxsim/sapphireamx.h +2 -2
  82. package/include/numkong/maxsim/sme.h +2 -2
  83. package/include/numkong/mesh/haswell.h +2 -2
  84. package/include/numkong/mesh/neon.h +2 -2
  85. package/include/numkong/mesh/neonbfdot.h +2 -2
  86. package/include/numkong/mesh/rvv.h +2 -2
  87. package/include/numkong/mesh/skylake.h +2 -2
  88. package/include/numkong/numkong.h +1 -1
  89. package/include/numkong/probability/haswell.h +2 -2
  90. package/include/numkong/probability/neon.h +2 -2
  91. package/include/numkong/probability/rvv.h +2 -2
  92. package/include/numkong/probability/skylake.h +2 -2
  93. package/include/numkong/reduce/alder.h +2 -2
  94. package/include/numkong/reduce/genoa.h +2 -2
  95. package/include/numkong/reduce/haswell.h +2 -2
  96. package/include/numkong/reduce/icelake.h +2 -2
  97. package/include/numkong/reduce/neon.h +2 -2
  98. package/include/numkong/reduce/neonbfdot.h +2 -2
  99. package/include/numkong/reduce/neonfhm.h +2 -2
  100. package/include/numkong/reduce/neonsdot.h +2 -2
  101. package/include/numkong/reduce/rvv.h +2 -2
  102. package/include/numkong/reduce/sierra.h +2 -2
  103. package/include/numkong/reduce/skylake.h +2 -2
  104. package/include/numkong/scalar/haswell.h +2 -2
  105. package/include/numkong/scalar/loongsonasx.h +2 -2
  106. package/include/numkong/scalar/neon.h +2 -2
  107. package/include/numkong/scalar/neonhalf.h +2 -2
  108. package/include/numkong/scalar/powervsx.h +2 -2
  109. package/include/numkong/scalar/rvv.h +2 -2
  110. package/include/numkong/scalar/sapphire.h +2 -2
  111. package/include/numkong/set/haswell.h +2 -2
  112. package/include/numkong/set/icelake.h +2 -2
  113. package/include/numkong/set/loongsonasx.h +2 -2
  114. package/include/numkong/set/neon.h +2 -2
  115. package/include/numkong/set/powervsx.h +2 -2
  116. package/include/numkong/set/rvv.h +2 -2
  117. package/include/numkong/set/rvvbb.h +2 -2
  118. package/include/numkong/set/sve.h +2 -2
  119. package/include/numkong/sets/haswell.h +2 -2
  120. package/include/numkong/sets/icelake.h +2 -2
  121. package/include/numkong/sets/loongsonasx.h +2 -2
  122. package/include/numkong/sets/neon.h +2 -2
  123. package/include/numkong/sets/powervsx.h +2 -2
  124. package/include/numkong/sets/smebi32.h +2 -2
  125. package/include/numkong/sparse/icelake.h +2 -2
  126. package/include/numkong/sparse/neon.h +2 -2
  127. package/include/numkong/sparse/sve2.h +2 -2
  128. package/include/numkong/sparse/turin.h +2 -2
  129. package/include/numkong/spatial/alder.h +2 -2
  130. package/include/numkong/spatial/diamond.h +2 -2
  131. package/include/numkong/spatial/genoa.h +2 -2
  132. package/include/numkong/spatial/haswell.h +2 -2
  133. package/include/numkong/spatial/icelake.h +2 -2
  134. package/include/numkong/spatial/loongsonasx.h +2 -2
  135. package/include/numkong/spatial/neon.h +2 -2
  136. package/include/numkong/spatial/neonbfdot.h +2 -2
  137. package/include/numkong/spatial/neonfp8.h +2 -2
  138. package/include/numkong/spatial/neonsdot.h +2 -2
  139. package/include/numkong/spatial/powervsx.h +2 -2
  140. package/include/numkong/spatial/rvv.h +2 -2
  141. package/include/numkong/spatial/rvvbf16.h +2 -2
  142. package/include/numkong/spatial/rvvhalf.h +2 -2
  143. package/include/numkong/spatial/sierra.h +2 -2
  144. package/include/numkong/spatial/skylake.h +2 -2
  145. package/include/numkong/spatial/sve.h +2 -2
  146. package/include/numkong/spatial/svebfdot.h +2 -2
  147. package/include/numkong/spatial/svehalf.h +2 -2
  148. package/include/numkong/spatial/svesdot.h +2 -2
  149. package/include/numkong/spatials/alder.h +2 -2
  150. package/include/numkong/spatials/diamond.h +2 -2
  151. package/include/numkong/spatials/genoa.h +2 -2
  152. package/include/numkong/spatials/haswell.h +2 -2
  153. package/include/numkong/spatials/icelake.h +2 -2
  154. package/include/numkong/spatials/loongsonasx.h +2 -2
  155. package/include/numkong/spatials/neon.h +2 -2
  156. package/include/numkong/spatials/neonbfdot.h +2 -2
  157. package/include/numkong/spatials/neonfhm.h +2 -2
  158. package/include/numkong/spatials/neonfp8.h +2 -2
  159. package/include/numkong/spatials/neonsdot.h +2 -2
  160. package/include/numkong/spatials/powervsx.h +2 -2
  161. package/include/numkong/spatials/rvv.h +2 -2
  162. package/include/numkong/spatials/sapphireamx.h +2 -2
  163. package/include/numkong/spatials/sierra.h +2 -2
  164. package/include/numkong/spatials/skylake.h +2 -2
  165. package/include/numkong/spatials/sme.h +2 -2
  166. package/include/numkong/spatials/smef64.h +2 -2
  167. package/include/numkong/trigonometry/haswell.h +2 -2
  168. package/include/numkong/trigonometry/neon.h +2 -2
  169. package/include/numkong/trigonometry/rvv.h +2 -2
  170. package/include/numkong/trigonometry/skylake.h +2 -2
  171. package/include/numkong/types.h +88 -80
  172. package/package.json +7 -7
@@ -32,7 +32,7 @@
32
32
  #ifndef NK_TRIGONOMETRY_NEON_H
33
33
  #define NK_TRIGONOMETRY_NEON_H
34
34
 
35
- #if NK_TARGET_ARM_
35
+ #if NK_TARGET_ARM64_
36
36
  #if NK_TARGET_NEON
37
37
 
38
38
  #include "numkong/types.h"
@@ -634,5 +634,5 @@ NK_PUBLIC void nk_each_atan_f64_neon(nk_f64_t const *ins, nk_size_t n, nk_f64_t
634
634
  #endif
635
635
 
636
636
  #endif // NK_TARGET_NEON
637
- #endif // NK_TARGET_ARM_
637
+ #endif // NK_TARGET_ARM64_
638
638
  #endif // NK_TRIGONOMETRY_NEON_H
@@ -37,7 +37,7 @@
37
37
  #ifndef NK_TRIGONOMETRY_RVV_H
38
38
  #define NK_TRIGONOMETRY_RVV_H
39
39
 
40
- #if NK_TARGET_RISCV_
40
+ #if NK_TARGET_RISCV64_
41
41
  #if NK_TARGET_RVV
42
42
 
43
43
  #include "numkong/types.h"
@@ -696,5 +696,5 @@ NK_PUBLIC void nk_each_atan_f16_rvv(nk_f16_t const *ins, nk_size_t n, nk_f16_t *
696
696
  #endif
697
697
 
698
698
  #endif // NK_TARGET_RVV
699
- #endif // NK_TARGET_RISCV_
699
+ #endif // NK_TARGET_RISCV64_
700
700
  #endif // NK_TRIGONOMETRY_RVV_H
@@ -23,7 +23,7 @@
23
23
  #ifndef NK_TRIGONOMETRY_SKYLAKE_H
24
24
  #define NK_TRIGONOMETRY_SKYLAKE_H
25
25
 
26
- #if NK_TARGET_X86_
26
+ #if NK_TARGET_X8664_
27
27
  #if NK_TARGET_SKYLAKE
28
28
 
29
29
  #include "numkong/types.h"
@@ -721,5 +721,5 @@ NK_PUBLIC void nk_each_atan_f16_skylake(nk_f16_t const *ins, nk_size_t n, nk_f16
721
721
  #endif
722
722
 
723
723
  #endif // NK_TARGET_SKYLAKE
724
- #endif // NK_TARGET_X86_
724
+ #endif // NK_TARGET_X8664_
725
725
  #endif // NK_TRIGONOMETRY_SKYLAKE_H
@@ -7,7 +7,7 @@
7
7
  * Defines:
8
8
  *
9
9
  * - Sized aliases for numeric types, like: `nk_i32_t` and `nk_f64_t`.
10
- * - Macros for internal compiler/hardware checks, like: `NK_TARGET_ARM_`.
10
+ * - Macros for internal compiler/hardware checks, like: `NK_TARGET_ARM64_`.
11
11
  * - Macros for feature controls, like: `NK_TARGET_NEON`
12
12
  *
13
13
  * @section fp8_types FP8 Numeric Types
@@ -126,52 +126,52 @@
126
126
  #define NK_ALLOW_ISA_REDIRECT 1
127
127
  #endif
128
128
 
129
- // Compiling for Arm: NK_TARGET_ARM_
129
+ // Compiling for 64-bit Arm: NK_TARGET_ARM64_
130
130
  // https://arm-software.github.io/acle/main/acle.html
131
- #if !defined(NK_TARGET_ARM_)
131
+ #if !defined(NK_TARGET_ARM64_)
132
132
  #if defined(__aarch64__) || defined(_M_ARM64)
133
- #define NK_TARGET_ARM_ 1
133
+ #define NK_TARGET_ARM64_ 1
134
134
  #else
135
- #define NK_TARGET_ARM_ 0
135
+ #define NK_TARGET_ARM64_ 0
136
136
  #endif // defined(__aarch64__) || defined(_M_ARM64)
137
- #endif // !defined(NK_TARGET_ARM_)
137
+ #endif // !defined(NK_TARGET_ARM64_)
138
138
 
139
- // Compiling for x86: NK_TARGET_X86_
139
+ // Compiling for x86: NK_TARGET_X8664_
140
140
  // https://www.intel.com/content/www/us/en/docs/dpcpp-cpp-compiler/developer-guide-reference/2024-2/additional-predefined-macros.html
141
- #if !defined(NK_TARGET_X86_)
141
+ #if !defined(NK_TARGET_X8664_)
142
142
  #if defined(__x86_64__) || defined(_M_X64)
143
- #define NK_TARGET_X86_ 1
143
+ #define NK_TARGET_X8664_ 1
144
144
  #else
145
- #define NK_TARGET_X86_ 0
145
+ #define NK_TARGET_X8664_ 0
146
146
  #endif // defined(__x86_64__) || defined(_M_X64)
147
- #endif // !defined(NK_TARGET_X86_)
147
+ #endif // !defined(NK_TARGET_X8664_)
148
148
 
149
- // Compiling for RISC-V: NK_TARGET_RISCV_
150
- #if !defined(NK_TARGET_RISCV_)
149
+ // Compiling for RISC-V: NK_TARGET_RISCV64_
150
+ #if !defined(NK_TARGET_RISCV64_)
151
151
  #if defined(__riscv) && (__riscv_xlen == 64)
152
- #define NK_TARGET_RISCV_ 1
152
+ #define NK_TARGET_RISCV64_ 1
153
153
  #else
154
- #define NK_TARGET_RISCV_ 0
154
+ #define NK_TARGET_RISCV64_ 0
155
155
  #endif // defined(__riscv) && (__riscv_xlen == 64)
156
- #endif // !defined(NK_TARGET_RISCV_)
156
+ #endif // !defined(NK_TARGET_RISCV64_)
157
157
 
158
- // Compiling for LoongArch: NK_TARGET_LOONGARCH_
159
- #if !defined(NK_TARGET_LOONGARCH_)
158
+ // Compiling for LoongArch: NK_TARGET_LOONGARCH64_
159
+ #if !defined(NK_TARGET_LOONGARCH64_)
160
160
  #if defined(__loongarch__)
161
- #define NK_TARGET_LOONGARCH_ 1
161
+ #define NK_TARGET_LOONGARCH64_ 1
162
162
  #else
163
- #define NK_TARGET_LOONGARCH_ 0
163
+ #define NK_TARGET_LOONGARCH64_ 0
164
164
  #endif // defined(__loongarch__)
165
- #endif // !defined(NK_TARGET_LOONGARCH_)
165
+ #endif // !defined(NK_TARGET_LOONGARCH64_)
166
166
 
167
- // Compiling for Power: NK_TARGET_POWER_
168
- #if !defined(NK_TARGET_POWER_)
167
+ // Compiling for Power: NK_TARGET_POWER64_
168
+ #if !defined(NK_TARGET_POWER64_)
169
169
  #if defined(__powerpc64__) || defined(__ppc64__) || defined(_ARCH_PPC64)
170
- #define NK_TARGET_POWER_ 1
170
+ #define NK_TARGET_POWER64_ 1
171
171
  #else
172
- #define NK_TARGET_POWER_ 0
172
+ #define NK_TARGET_POWER64_ 0
173
173
  #endif // defined(__powerpc64__) || defined(__ppc64__) || defined(_ARCH_PPC64)
174
- #endif // !defined(NK_TARGET_POWER_)
174
+ #endif // !defined(NK_TARGET_POWER64_)
175
175
 
176
176
  // Compiling for WASM: NK_TARGET_WASM_
177
177
  #if !defined(NK_TARGET_WASM_)
@@ -203,7 +203,7 @@
203
203
  #endif // !defined(NK_TARGET_V128RELAXED) || ...
204
204
 
205
205
  // Compiling for RISC-V Vector: NK_TARGET_RVV
206
- #if !defined(NK_TARGET_RVV) || (NK_TARGET_RVV && !NK_TARGET_RISCV_)
206
+ #if !defined(NK_TARGET_RVV) || (NK_TARGET_RVV && !NK_TARGET_RISCV64_)
207
207
  #if defined(__riscv_v) && (__riscv_v >= 1000000)
208
208
  #define NK_TARGET_RVV 1
209
209
  #else
@@ -248,7 +248,7 @@
248
248
  // Compiling for LoongArch LASX (256-bit SIMD): NK_TARGET_LOONGSONASX
249
249
  // LASX provides 32 × 256-bit vector registers, widening integer multiply-accumulate,
250
250
  // and f32-to-f64 conversion (xvfcvtl_d_s / xvfcvth_d_s) but no widening FMA.
251
- #if !defined(NK_TARGET_LOONGSONASX) || (NK_TARGET_LOONGSONASX && !NK_TARGET_LOONGARCH_)
251
+ #if !defined(NK_TARGET_LOONGSONASX) || (NK_TARGET_LOONGSONASX && !NK_TARGET_LOONGARCH64_)
252
252
  #if defined(__loongarch_asx)
253
253
  #define NK_TARGET_LOONGSONASX 1
254
254
  #else
@@ -261,7 +261,7 @@
261
261
  // VSX provides 64 × 128-bit registers, FMA (vec_madd), vec_msum (multiply-sum), hardware f16
262
262
  // conversion (vec_extract_fp32_from_shorth/l), length-limited loads (vec_xl_len), per-byte
263
263
  // popcount (vec_popcnt), and vec_cmpne. Requires POWER9 (ISA 3.0) or newer.
264
- #if !defined(NK_TARGET_POWERVSX) || (NK_TARGET_POWERVSX && !NK_TARGET_POWER_)
264
+ #if !defined(NK_TARGET_POWERVSX) || (NK_TARGET_POWERVSX && !NK_TARGET_POWER64_)
265
265
  #if defined(__VSX__) && defined(__POWER9_VECTOR__)
266
266
  #define NK_TARGET_POWERVSX 1
267
267
  #else
@@ -270,19 +270,20 @@
270
270
  #endif // defined(__VSX__)
271
271
  #endif // !defined(NK_TARGET_POWERVSX) || ...
272
272
 
273
- // Compiling for Arm: NK_TARGET_NEON
274
- #if !defined(NK_TARGET_NEON) || (NK_TARGET_NEON && !NK_TARGET_ARM_)
275
- #if defined(__ARM_NEON) || (defined(_MSC_VER) && defined(_M_ARM64))
273
+ // Compiling for Arm: NK_TARGET_NEON (AArch64 only, AArch32 NEON is not supported)
274
+ #if !defined(NK_TARGET_NEON) || (NK_TARGET_NEON && !NK_TARGET_ARM64_)
275
+ #if (defined(__ARM_NEON) && defined(__aarch64__)) || (defined(_MSC_VER) && defined(_M_ARM64))
276
276
  #define NK_TARGET_NEON 1
277
277
  #else
278
278
  #undef NK_TARGET_NEON
279
279
  #define NK_TARGET_NEON 0
280
- #endif // defined(__ARM_NEON) || ...
280
+ #endif // (defined(__ARM_NEON) && defined(__aarch64__)) || ...
281
281
  #endif // !defined(NK_TARGET_NEON) || ...
282
282
 
283
- // Compiling for Arm: NK_TARGET_NEONSDOT (FEAT_DotProd, optional from ARMv8.1, mandatory at ARMv8.4 with AdvSIMD)
284
- #if !defined(NK_TARGET_NEONSDOT) || (NK_TARGET_NEONSDOT && !NK_TARGET_ARM_)
285
- #if defined(__ARM_FEATURE_DOTPROD) || (defined(_MSC_VER) && defined(_M_ARM64) && __ARM_ARCH >= 804)
283
+ // Compiling for Arm: NK_TARGET_NEONSDOT (FEAT_DotProd, AArch64 only)
284
+ #if !defined(NK_TARGET_NEONSDOT) || (NK_TARGET_NEONSDOT && !NK_TARGET_ARM64_)
285
+ #if (defined(__ARM_FEATURE_DOTPROD) && defined(__aarch64__)) || \
286
+ (defined(_MSC_VER) && defined(_M_ARM64) && __ARM_ARCH >= 804)
286
287
  #define NK_TARGET_NEONSDOT 1
287
288
  #else
288
289
  #undef NK_TARGET_NEONSDOT
@@ -290,9 +291,10 @@
290
291
  #endif
291
292
  #endif // !defined(NK_TARGET_NEONSDOT) || ...
292
293
 
293
- // Compiling for Arm: NK_TARGET_NEONHALF (FEAT_FP16, optional from ARMv8.2, mandatory at ARMv9.0 with AdvSIMD)
294
- #if !defined(NK_TARGET_NEONHALF) || (NK_TARGET_NEONHALF && !NK_TARGET_ARM_)
295
- #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) || (defined(_MSC_VER) && defined(_M_ARM64) && __ARM_ARCH >= 802)
294
+ // Compiling for Arm: NK_TARGET_NEONHALF (FEAT_FP16, AArch64 only)
295
+ #if !defined(NK_TARGET_NEONHALF) || (NK_TARGET_NEONHALF && !NK_TARGET_ARM64_)
296
+ #if (defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(__aarch64__)) || \
297
+ (defined(_MSC_VER) && defined(_M_ARM64) && __ARM_ARCH >= 802)
296
298
  #define NK_TARGET_NEONHALF 1
297
299
  #else
298
300
  #undef NK_TARGET_NEONHALF
@@ -300,9 +302,10 @@
300
302
  #endif
301
303
  #endif // !defined(NK_TARGET_NEONHALF) || ...
302
304
 
303
- // Compiling for Arm: NK_TARGET_NEONFHM (FEAT_FHM, optional from ARMv8.1, mandatory at ARMv8.4 with FP16)
304
- #if !defined(NK_TARGET_NEONFHM) || (NK_TARGET_NEONFHM && !NK_TARGET_ARM_)
305
- #if defined(__ARM_FEATURE_FP16_FML) || (defined(_MSC_VER) && defined(_M_ARM64) && __ARM_ARCH >= 804)
305
+ // Compiling for Arm: NK_TARGET_NEONFHM (FEAT_FHM, AArch64 only)
306
+ #if !defined(NK_TARGET_NEONFHM) || (NK_TARGET_NEONFHM && !NK_TARGET_ARM64_)
307
+ #if (defined(__ARM_FEATURE_FP16_FML) && defined(__aarch64__)) || \
308
+ (defined(_MSC_VER) && defined(_M_ARM64) && __ARM_ARCH >= 804)
306
309
  #define NK_TARGET_NEONFHM 1
307
310
  #else
308
311
  #undef NK_TARGET_NEONFHM
@@ -310,9 +313,10 @@
310
313
  #endif
311
314
  #endif // !defined(NK_TARGET_NEONFHM) || ...
312
315
 
313
- // Compiling for Arm: NK_TARGET_NEONBFDOT (FEAT_BF16, optional from ARMv8.2, mandatory at ARMv8.6 with FP)
314
- #if !defined(NK_TARGET_NEONBFDOT) || (NK_TARGET_NEONBFDOT && !NK_TARGET_ARM_)
315
- #if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || (defined(_MSC_VER) && defined(_M_ARM64) && __ARM_ARCH >= 806)
316
+ // Compiling for Arm: NK_TARGET_NEONBFDOT (FEAT_BF16, AArch64 only)
317
+ #if !defined(NK_TARGET_NEONBFDOT) || (NK_TARGET_NEONBFDOT && !NK_TARGET_ARM64_)
318
+ #if (defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) && defined(__aarch64__)) || \
319
+ (defined(_MSC_VER) && defined(_M_ARM64) && __ARM_ARCH >= 806)
316
320
  #define NK_TARGET_NEONBFDOT 1
317
321
  #else
318
322
  #undef NK_TARGET_NEONBFDOT
@@ -323,8 +327,8 @@
323
327
  // Compiling for Arm: NK_TARGET_NEONFP8 (NEON FP8 extensions, FEAT_FP8DOT4)
324
328
  // ACLE macro __ARM_FEATURE_FP8DOT4 defined by GCC 15+ and Clang 21+ when +fp8dot4 is enabled.
325
329
  // Older compilers lack mfloat8x16_t and the fp8dot4 target attribute entirely.
326
- #if !defined(NK_TARGET_NEONFP8) || (NK_TARGET_NEONFP8 && !NK_TARGET_ARM_)
327
- #if defined(__ARM_FEATURE_FP8DOT4)
330
+ #if !defined(NK_TARGET_NEONFP8) || (NK_TARGET_NEONFP8 && !NK_TARGET_ARM64_)
331
+ #if defined(__ARM_FEATURE_FP8DOT4) && defined(__aarch64__)
328
332
  #define NK_TARGET_NEONFP8 1
329
333
  #else
330
334
  #undef NK_TARGET_NEONFP8
@@ -333,7 +337,7 @@
333
337
  #endif // !defined(NK_TARGET_NEONFP8) || ...
334
338
 
335
339
  // Compiling for Arm: NK_TARGET_SVE
336
- #if !defined(NK_TARGET_SVE) || (NK_TARGET_SVE && !NK_TARGET_ARM_)
340
+ #if !defined(NK_TARGET_SVE) || (NK_TARGET_SVE && !NK_TARGET_ARM64_)
337
341
  #if defined(__ARM_FEATURE_SVE)
338
342
  #define NK_TARGET_SVE 1
339
343
  #else
@@ -343,7 +347,7 @@
343
347
  #endif // !defined(NK_TARGET_SVE) || ...
344
348
 
345
349
  // Compiling for Arm: NK_TARGET_SVESDOT
346
- #if !defined(NK_TARGET_SVESDOT) || (NK_TARGET_SVESDOT && !NK_TARGET_ARM_)
350
+ #if !defined(NK_TARGET_SVESDOT) || (NK_TARGET_SVESDOT && !NK_TARGET_ARM64_)
347
351
  #if defined(__ARM_FEATURE_SVE)
348
352
  #define NK_TARGET_SVESDOT 1
349
353
  #else
@@ -353,7 +357,7 @@
353
357
  #endif // !defined(NK_TARGET_SVESDOT) || ...
354
358
 
355
359
  // Compiling for Arm: NK_TARGET_SVEHALF
356
- #if !defined(NK_TARGET_SVEHALF) || (NK_TARGET_SVEHALF && !NK_TARGET_ARM_)
360
+ #if !defined(NK_TARGET_SVEHALF) || (NK_TARGET_SVEHALF && !NK_TARGET_ARM64_)
357
361
  #if defined(__ARM_FEATURE_SVE)
358
362
  #define NK_TARGET_SVEHALF 1
359
363
  #else
@@ -363,7 +367,7 @@
363
367
  #endif // !defined(NK_TARGET_SVEHALF) || ...
364
368
 
365
369
  // Compiling for Arm: NK_TARGET_SVEBFDOT
366
- #if !defined(NK_TARGET_SVEBFDOT) || (NK_TARGET_SVEBFDOT && !NK_TARGET_ARM_)
370
+ #if !defined(NK_TARGET_SVEBFDOT) || (NK_TARGET_SVEBFDOT && !NK_TARGET_ARM64_)
367
371
  #if defined(__ARM_FEATURE_SVE)
368
372
  #define NK_TARGET_SVEBFDOT 1
369
373
  #else
@@ -373,7 +377,7 @@
373
377
  #endif // !defined(NK_TARGET_SVEBFDOT) || ...
374
378
 
375
379
  // Compiling for Arm: NK_TARGET_SVE2
376
- #if !defined(NK_TARGET_SVE2) || (NK_TARGET_SVE2 && !NK_TARGET_ARM_)
380
+ #if !defined(NK_TARGET_SVE2) || (NK_TARGET_SVE2 && !NK_TARGET_ARM64_)
377
381
  #if defined(__ARM_FEATURE_SVE2)
378
382
  #define NK_TARGET_SVE2 1
379
383
  #else
@@ -383,13 +387,13 @@
383
387
  #endif // !defined(NK_TARGET_SVE2) || ...
384
388
 
385
389
  // Compiling for Arm: NK_TARGET_SVE2P1
386
- #if !defined(NK_TARGET_SVE2P1) || (NK_TARGET_SVE2P1 && !NK_TARGET_ARM_)
390
+ #if !defined(NK_TARGET_SVE2P1) || (NK_TARGET_SVE2P1 && !NK_TARGET_ARM64_)
387
391
  #undef NK_TARGET_SVE2P1
388
392
  #define NK_TARGET_SVE2P1 0
389
393
  #endif // !defined(NK_TARGET_SVE2P1) || ...
390
394
 
391
395
  // Compiling for Arm: NK_TARGET_SME (Scalable Matrix Extension)
392
- #if !defined(NK_TARGET_SME) || (NK_TARGET_SME && !NK_TARGET_ARM_)
396
+ #if !defined(NK_TARGET_SME) || (NK_TARGET_SME && !NK_TARGET_ARM64_)
393
397
  #if defined(__ARM_FEATURE_SME)
394
398
  #define NK_TARGET_SME 1
395
399
  #else
@@ -398,7 +402,7 @@
398
402
  #endif // defined(__ARM_FEATURE_SME)
399
403
  #endif // !defined(NK_TARGET_SME) || ...
400
404
 
401
- #if !defined(NK_TARGET_SME2) || (NK_TARGET_SME2 && !NK_TARGET_ARM_)
405
+ #if !defined(NK_TARGET_SME2) || (NK_TARGET_SME2 && !NK_TARGET_ARM64_)
402
406
  #if defined(__ARM_FEATURE_SME2)
403
407
  #define NK_TARGET_SME2 1
404
408
  #else
@@ -409,7 +413,7 @@
409
413
 
410
414
  // Compiling for Arm: NK_TARGET_SME2P1 (FEAT_SME2p1)
411
415
  // ACLE macro: __ARM_FEATURE_SME2p1 (note lowercase 'p')
412
- #if !defined(NK_TARGET_SME2P1) || (NK_TARGET_SME2P1 && !NK_TARGET_ARM_)
416
+ #if !defined(NK_TARGET_SME2P1) || (NK_TARGET_SME2P1 && !NK_TARGET_ARM64_)
413
417
  #if defined(__ARM_FEATURE_SME2p1)
414
418
  #define NK_TARGET_SME2P1 1
415
419
  #else
@@ -420,7 +424,7 @@
420
424
 
421
425
  // AppleClang 17 exposes SME sub-features through `arm_sme.h` builtin aliases,
422
426
  // not dedicated `__ARM_FEATURE_*` predefines for every matrix subtype.
423
- #if !defined(NK_TARGET_SMEF64) || (NK_TARGET_SMEF64 && !NK_TARGET_ARM_)
427
+ #if !defined(NK_TARGET_SMEF64) || (NK_TARGET_SMEF64 && !NK_TARGET_ARM64_)
424
428
  #if defined(__ARM_FEATURE_SME_F64F64) || (defined(__has_builtin) && __has_builtin(__builtin_sme_svmopa_za64_f64_m))
425
429
  #define NK_TARGET_SMEF64 1
426
430
  #else
@@ -429,7 +433,7 @@
429
433
  #endif // defined(__ARM_FEATURE_SME_F64F64) || ...
430
434
  #endif // !defined(NK_TARGET_SMEF64) || ...
431
435
 
432
- #if !defined(NK_TARGET_SMEBI32) || (NK_TARGET_SMEBI32 && !NK_TARGET_ARM_)
436
+ #if !defined(NK_TARGET_SMEBI32) || (NK_TARGET_SMEBI32 && !NK_TARGET_ARM64_)
433
437
  #if defined(__has_builtin) && __has_builtin(__builtin_sme_svbmopa_za32_u32_m)
434
438
  #define NK_TARGET_SMEBI32 1
435
439
  #else
@@ -438,7 +442,7 @@
438
442
  #endif // defined(__has_builtin) && __has_builtin(__builtin_sme_svbmopa_za32_u32_m)
439
443
  #endif // !defined(NK_TARGET_SMEBI32) || ...
440
444
 
441
- #if !defined(NK_TARGET_SMEHALF) || (NK_TARGET_SMEHALF && !NK_TARGET_ARM_)
445
+ #if !defined(NK_TARGET_SMEHALF) || (NK_TARGET_SMEHALF && !NK_TARGET_ARM64_)
442
446
  #if defined(__ARM_FEATURE_SME_F16F16) || (defined(__has_builtin) && __has_builtin(__builtin_sme_svmopa_za32_f16_m))
443
447
  #define NK_TARGET_SMEHALF 1
444
448
  #else
@@ -447,7 +451,7 @@
447
451
  #endif // defined(__has_builtin) && __has_builtin(__builtin_sme_svmopa_za32_f16_m)
448
452
  #endif // !defined(NK_TARGET_SMEHALF) || ...
449
453
 
450
- #if !defined(NK_TARGET_SMEBF16) || (NK_TARGET_SMEBF16 && !NK_TARGET_ARM_)
454
+ #if !defined(NK_TARGET_SMEBF16) || (NK_TARGET_SMEBF16 && !NK_TARGET_ARM64_)
451
455
  #if defined(__has_builtin) && __has_builtin(__builtin_sme_svmopa_za32_bf16_m)
452
456
  #define NK_TARGET_SMEBF16 1
453
457
  #else
@@ -456,7 +460,7 @@
456
460
  #endif // defined(__has_builtin) && __has_builtin(__builtin_sme_svmopa_za32_bf16_m)
457
461
  #endif // !defined(NK_TARGET_SMEBF16) || ...
458
462
 
459
- #if !defined(NK_TARGET_SMELUT2) || (NK_TARGET_SMELUT2 && !NK_TARGET_ARM_)
463
+ #if !defined(NK_TARGET_SMELUT2) || (NK_TARGET_SMELUT2 && !NK_TARGET_ARM64_)
460
464
  #if defined(__has_builtin) && __has_builtin(__builtin_sme_svluti2_lane_zt_u8)
461
465
  #define NK_TARGET_SMELUT2 1
462
466
  #else
@@ -466,7 +470,7 @@
466
470
  #endif // !defined(NK_TARGET_SMELUT2) || ...
467
471
 
468
472
  // Compiling for Arm: NK_TARGET_SMEFA64 (FEAT_SME_FA64, full SVE2 in streaming mode)
469
- #if !defined(NK_TARGET_SMEFA64) || (NK_TARGET_SMEFA64 && !NK_TARGET_ARM_)
473
+ #if !defined(NK_TARGET_SMEFA64) || (NK_TARGET_SMEFA64 && !NK_TARGET_ARM64_)
470
474
  #if defined(__ARM_FEATURE_SME_FA64)
471
475
  #define NK_TARGET_SMEFA64 1
472
476
  #else
@@ -491,7 +495,7 @@
491
495
  // - _MSC_VER >= 1900 (VS 2015+): AVX2/FMA/F16C (Haswell)
492
496
  // - _MSC_VER >= 1920 (VS 2019+): AVX-512 base (Skylake, Icelake), AVX-VNNI (Alder)
493
497
  // - _MSC_VER >= 1944 (VS 2022 17.14+): BF16, FP16, VP2INTERSECT, VNNI-INT8 (Sierra), AMX
494
- #if !defined(NK_TARGET_HASWELL) || (NK_TARGET_HASWELL && !NK_TARGET_X86_)
498
+ #if !defined(NK_TARGET_HASWELL) || (NK_TARGET_HASWELL && !NK_TARGET_X8664_)
495
499
  #if (defined(__AVX2__) && defined(__FMA__) && defined(__F16C__)) || (defined(_MSC_VER) && _MSC_VER >= 1900)
496
500
  #define NK_TARGET_HASWELL 1
497
501
  #else
@@ -507,7 +511,7 @@
507
511
  // gcc-12 -march=sapphirerapids -dM -E - < /dev/null | egrep "SSE|AVX" | sort
508
512
  // On Arm machines you may want to check for other flags:
509
513
  // gcc-12 -march=native -dM -E - < /dev/null | egrep "NEON|SVE|FP16|FMA" | sort
510
- #if !defined(NK_TARGET_SKYLAKE) || (NK_TARGET_SKYLAKE && !NK_TARGET_X86_)
514
+ #if !defined(NK_TARGET_SKYLAKE) || (NK_TARGET_SKYLAKE && !NK_TARGET_X8664_)
511
515
  #if (defined(__AVX512F__) && defined(__AVX512CD__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && \
512
516
  defined(__AVX512BW__)) || \
513
517
  (defined(_MSC_VER) && _MSC_VER >= 1920)
@@ -518,7 +522,7 @@
518
522
  #endif
519
523
  #endif // !defined(NK_TARGET_SKYLAKE) || ...
520
524
 
521
- #if !defined(NK_TARGET_ICELAKE) || (NK_TARGET_ICELAKE && !NK_TARGET_X86_)
525
+ #if !defined(NK_TARGET_ICELAKE) || (NK_TARGET_ICELAKE && !NK_TARGET_X8664_)
522
526
  #if (defined(__AVX512VNNI__) && defined(__AVX512IFMA__) && defined(__AVX512BITALG__) && defined(__AVX512VBMI__) && \
523
527
  defined(__AVX512VBMI2__) && defined(__AVX512VPOPCNTDQ__)) || \
524
528
  (defined(_MSC_VER) && _MSC_VER >= 1920)
@@ -529,7 +533,7 @@
529
533
  #endif
530
534
  #endif // !defined(NK_TARGET_ICELAKE) || ...
531
535
 
532
- #if !defined(NK_TARGET_GENOA) || (NK_TARGET_GENOA && !NK_TARGET_X86_)
536
+ #if !defined(NK_TARGET_GENOA) || (NK_TARGET_GENOA && !NK_TARGET_X8664_)
533
537
  #if defined(__AVX512BF16__) || (defined(_MSC_VER) && _MSC_VER >= 1944)
534
538
  #define NK_TARGET_GENOA 1
535
539
  #else
@@ -542,7 +546,7 @@
542
546
  // GCC 14+: defines __AVX10_2__ with -mavx10.2-512
543
547
  // Clang 19+: defines __AVX10_2__ with -mavx10.2-512
544
548
  // MSVC: defines __AVX10_VER__ >= 2 with /arch:AVX10.2 (VS 2026+, not yet released)
545
- #if !defined(NK_TARGET_DIAMOND) || (NK_TARGET_DIAMOND && !NK_TARGET_X86_)
549
+ #if !defined(NK_TARGET_DIAMOND) || (NK_TARGET_DIAMOND && !NK_TARGET_X8664_)
546
550
  #if defined(__AVX10_2__) || (defined(__AVX10_VER__) && __AVX10_VER__ >= 2)
547
551
  #define NK_TARGET_DIAMOND 1
548
552
  #else
@@ -551,7 +555,7 @@
551
555
  #endif // defined(__AVX10_2__) || ...
552
556
  #endif // !defined(NK_TARGET_DIAMOND) || ...
553
557
 
554
- #if !defined(NK_TARGET_SAPPHIRE) || (NK_TARGET_SAPPHIRE && !NK_TARGET_X86_)
558
+ #if !defined(NK_TARGET_SAPPHIRE) || (NK_TARGET_SAPPHIRE && !NK_TARGET_X8664_)
555
559
  #if defined(__AVX512FP16__) || (defined(_MSC_VER) && _MSC_VER >= 1944)
556
560
  #define NK_TARGET_SAPPHIRE 1
557
561
  #else
@@ -560,7 +564,7 @@
560
564
  #endif
561
565
  #endif // !defined(NK_TARGET_SAPPHIRE) || ...
562
566
 
563
- #if !defined(NK_TARGET_SAPPHIREAMX) || (NK_TARGET_SAPPHIREAMX && !NK_TARGET_X86_)
567
+ #if !defined(NK_TARGET_SAPPHIREAMX) || (NK_TARGET_SAPPHIREAMX && !NK_TARGET_X8664_)
564
568
  #if (defined(__AMX_TILE__) && defined(__AMX_BF16__) && defined(__AMX_INT8__)) || (defined(_MSC_VER) && _MSC_VER >= 1944)
565
569
  #define NK_TARGET_SAPPHIREAMX 1
566
570
  #else
@@ -569,7 +573,7 @@
569
573
  #endif
570
574
  #endif // !defined(NK_TARGET_SAPPHIREAMX) || ...
571
575
 
572
- #if !defined(NK_TARGET_GRANITEAMX) || (NK_TARGET_GRANITEAMX && !NK_TARGET_X86_)
576
+ #if !defined(NK_TARGET_GRANITEAMX) || (NK_TARGET_GRANITEAMX && !NK_TARGET_X8664_)
573
577
  #if (defined(__AMX_TILE__) && defined(__AMX_FP16__)) || (defined(_MSC_VER) && _MSC_VER >= 1944)
574
578
  #define NK_TARGET_GRANITEAMX 1
575
579
  #else
@@ -578,7 +582,7 @@
578
582
  #endif
579
583
  #endif // !defined(NK_TARGET_GRANITEAMX) || ...
580
584
 
581
- #if !defined(NK_TARGET_TURIN) || (NK_TARGET_TURIN && !NK_TARGET_X86_)
585
+ #if !defined(NK_TARGET_TURIN) || (NK_TARGET_TURIN && !NK_TARGET_X8664_)
582
586
  #if defined(__AVX512VP2INTERSECT__) || (defined(_MSC_VER) && _MSC_VER >= 1944)
583
587
  #define NK_TARGET_TURIN 1
584
588
  #else
@@ -587,7 +591,7 @@
587
591
  #endif
588
592
  #endif // !defined(NK_TARGET_TURIN) || ...
589
593
 
590
- #if !defined(NK_TARGET_ALDER) || (NK_TARGET_ALDER && !NK_TARGET_X86_)
594
+ #if !defined(NK_TARGET_ALDER) || (NK_TARGET_ALDER && !NK_TARGET_X8664_)
591
595
  #if defined(__AVXVNNI__) || (defined(_MSC_VER) && _MSC_VER >= 1920)
592
596
  #define NK_TARGET_ALDER 1
593
597
  #else
@@ -596,7 +600,7 @@
596
600
  #endif
597
601
  #endif // !defined(NK_TARGET_ALDER) || ...
598
602
 
599
- #if !defined(NK_TARGET_SIERRA) || (NK_TARGET_SIERRA && !NK_TARGET_X86_)
603
+ #if !defined(NK_TARGET_SIERRA) || (NK_TARGET_SIERRA && !NK_TARGET_X8664_)
600
604
  #if defined(__AVXVNNIINT8__) || (defined(_MSC_VER) && _MSC_VER >= 1944)
601
605
  #define NK_TARGET_SIERRA 1
602
606
  #else
@@ -671,7 +675,7 @@
671
675
  * NK_STREAMING_ marks functions that require streaming SVE mode (e.g. FCVTLT).
672
676
  * NK_STREAMING_COMPATIBLE_ marks helpers callable from both streaming and non-streaming mode.
673
677
  */
674
- #if NK_TARGET_ARM_ && NK_TARGET_SME
678
+ #if NK_TARGET_ARM64_ && NK_TARGET_SME
675
679
  #define NK_STREAMING_ __arm_streaming
676
680
  #define NK_STREAMING_COMPATIBLE_ __arm_streaming_compatible
677
681
  #else
@@ -684,7 +688,7 @@
684
688
  * MSVC typedefs `__m512bh`, `__m512h`, `__m256bh` as aliases for `__m512i`/`__m256i`,
685
689
  * but rejects C-style casts between them. GCC/Clang define them as distinct types.
686
690
  */
687
- #if NK_TARGET_X86_
691
+ #if NK_TARGET_X8664_
688
692
  #if defined(_MSC_VER)
689
693
  #define nk_m512bh_from_m512i_(x) (x)
690
694
  #define nk_m512h_from_m512i_(x) (x)
@@ -804,7 +808,7 @@ typedef unsigned int nk_u32_t;
804
808
  /* On LP64 targets (Linux ARM64, RISC-V 64), `long` and `long long` are both 64-bit but distinct types.
805
809
  * NEON/RVV intrinsics on Linux expect `long*`, while Apple's NEON intrinsics expect `long long*`.
806
810
  * Windows uses LLP64 where `long` is 32-bit, so it must use `long long` for 64-bit types. */
807
- #if ((NK_TARGET_ARM_ && !defined(NK_DEFINED_APPLE_)) || NK_TARGET_RISCV_) && !defined(NK_DEFINED_WINDOWS_)
811
+ #if ((NK_TARGET_ARM64_ && !defined(NK_DEFINED_APPLE_)) || NK_TARGET_RISCV64_) && !defined(NK_DEFINED_WINDOWS_)
808
812
  /** @brief Signed 64-bit integer. Range: [−2⁶³, +2⁶³−1]. */
809
813
  typedef signed long nk_i64_t;
810
814
  /** @brief Unsigned 64-bit integer. Range: [0, 2⁶⁴−1]. */
@@ -821,7 +825,7 @@ typedef float nk_f32_t;
821
825
  /** @brief Double-precision (64-bit) IEEE 754 float. sign(1) + exponent(11) + mantissa(52), bias=1023. */
822
826
  typedef double nk_f64_t;
823
827
 
824
- #if NK_TARGET_X86_ || NK_TARGET_ARM_ || NK_TARGET_RISCV_ || NK_TARGET_POWER_ || NK_TARGET_LOONGARCH_
828
+ #if NK_TARGET_X8664_ || NK_TARGET_ARM64_ || NK_TARGET_RISCV64_ || NK_TARGET_POWER64_ || NK_TARGET_LOONGARCH64_
825
829
  #define NK_IS_64BIT_ 1
826
830
  #else
827
831
  #define NK_IS_64BIT_ 0
@@ -1088,7 +1092,7 @@ typedef unsigned short nk_bf16_t;
1088
1092
  * Some of those are defined as aliases, so we use `#define` preprocessor
1089
1093
  * directives instead of `typedef` to avoid errors.
1090
1094
  */
1091
- #if NK_TARGET_ARM_
1095
+ #if NK_TARGET_ARM64_
1092
1096
  #if defined(_MSC_VER)
1093
1097
  #define nk_f16_for_arm_simd_t nk_f16_t
1094
1098
  #define nk_bf16_for_arm_simd_t nk_bf16_t
@@ -1102,7 +1106,7 @@ typedef unsigned short nk_bf16_t;
1102
1106
  * RISC-V Vector (RVV) intrinsics use `_Float16` for half-precision floats.
1103
1107
  * This is the standard C23 type, also available in GCC/Clang with RVV extensions.
1104
1108
  */
1105
- #if NK_TARGET_RISCV_
1109
+ #if NK_TARGET_RISCV64_
1106
1110
  #define nk_f16_for_rvv_intrinsics_t _Float16
1107
1111
  #endif
1108
1112
 
@@ -1237,6 +1241,8 @@ typedef union NK_MAY_ALIAS_ nk_b128_vec_t {
1237
1241
  int32x4_t i32x4;
1238
1242
  int64x2_t i64x2;
1239
1243
  float32x4_t f32x4;
1244
+ #endif
1245
+ #if NK_TARGET_NEON && NK_TARGET_ARM64_ // double-precision NEON requires AArch64
1240
1246
  float64x2_t f64x2;
1241
1247
  #endif
1242
1248
  #if NK_TARGET_NEONHALF
@@ -1294,6 +1300,8 @@ typedef union NK_MAY_ALIAS_ nk_b256_vec_t {
1294
1300
  int32x4_t i32x4s[2];
1295
1301
  int64x2_t i64x2s[2];
1296
1302
  float32x4_t f32x4s[2];
1303
+ #endif
1304
+ #if NK_TARGET_NEON && NK_TARGET_ARM64_ // double-precision NEON requires AArch64
1297
1305
  float64x2_t f64x2s[2];
1298
1306
  #endif
1299
1307
  #if NK_TARGET_POWERVSX
@@ -1588,7 +1596,7 @@ NK_INTERNAL int nk_bf16_is_nan_(nk_bf16_t x) {
1588
1596
  * SMSTART SM / SMSTOP SM so the calling function's ABI is unchanged.
1589
1597
  * Inside `__arm_locally_streaming` functions the plain `svcntXX()` intrinsics are fine.
1590
1598
  */
1591
- #if NK_TARGET_ARM_ && NK_TARGET_SME
1599
+ #if NK_TARGET_ARM64_ && NK_TARGET_SME
1592
1600
  /** @brief Streaming SVL byte-element count (SVL/8) via SMSTART SM bracket. */
1593
1601
  NK_INTERNAL nk_size_t nk_sme_cntb_(void) {
1594
1602
  nk_u64_t r;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "numkong",
3
- "version": "7.4.2",
3
+ "version": "7.4.3",
4
4
  "description": "Portable mixed-precision math, linear-algebra, & retrieval library with 2000+ SIMD kernels for x86, Arm, RISC-V, LoongArch, Power, & WebAssembly",
5
5
  "homepage": "https://github.com/ashvardanian/NumKong",
6
6
  "author": "Ash Vardanian",
@@ -98,11 +98,11 @@
98
98
  "printWidth": 120
99
99
  },
100
100
  "optionalDependencies": {
101
- "@numkong/darwin-arm64": "7.4.2",
102
- "@numkong/darwin-x64": "7.4.2",
103
- "@numkong/linux-arm64": "7.4.2",
104
- "@numkong/linux-x64": "7.4.2",
105
- "@numkong/win32-arm64": "7.4.2",
106
- "@numkong/win32-x64": "7.4.2"
101
+ "@numkong/darwin-arm64": "7.4.3",
102
+ "@numkong/darwin-x64": "7.4.3",
103
+ "@numkong/linux-arm64": "7.4.3",
104
+ "@numkong/linux-x64": "7.4.3",
105
+ "@numkong/win32-arm64": "7.4.3",
106
+ "@numkong/win32-x64": "7.4.3"
107
107
  }
108
108
  }