numkong 7.4.1 → 7.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. package/README.md +86 -130
  2. package/binding.gyp +16 -0
  3. package/c/numkong.c +1 -1
  4. package/include/numkong/attention/sapphireamx.h +2 -2
  5. package/include/numkong/attention/sme.h +2 -2
  6. package/include/numkong/capabilities.h +47 -47
  7. package/include/numkong/cast/diamond.h +2 -2
  8. package/include/numkong/cast/haswell.h +2 -2
  9. package/include/numkong/cast/icelake.h +2 -2
  10. package/include/numkong/cast/loongsonasx.h +2 -2
  11. package/include/numkong/cast/neon.h +2 -2
  12. package/include/numkong/cast/powervsx.h +2 -2
  13. package/include/numkong/cast/rvv.h +2 -2
  14. package/include/numkong/cast/sapphire.h +2 -2
  15. package/include/numkong/cast/skylake.h +2 -2
  16. package/include/numkong/curved/genoa.h +2 -2
  17. package/include/numkong/curved/haswell.h +2 -2
  18. package/include/numkong/curved/neon.h +2 -2
  19. package/include/numkong/curved/neonbfdot.h +2 -2
  20. package/include/numkong/curved/rvv.h +2 -2
  21. package/include/numkong/curved/skylake.h +2 -2
  22. package/include/numkong/curved/smef64.h +2 -2
  23. package/include/numkong/dot/alder.h +2 -2
  24. package/include/numkong/dot/diamond.h +2 -2
  25. package/include/numkong/dot/genoa.h +2 -2
  26. package/include/numkong/dot/haswell.h +2 -2
  27. package/include/numkong/dot/icelake.h +2 -2
  28. package/include/numkong/dot/loongsonasx.h +2 -2
  29. package/include/numkong/dot/neon.h +2 -2
  30. package/include/numkong/dot/neonbfdot.h +2 -2
  31. package/include/numkong/dot/neonfhm.h +2 -2
  32. package/include/numkong/dot/neonfp8.h +2 -2
  33. package/include/numkong/dot/neonsdot.h +2 -2
  34. package/include/numkong/dot/rvv.h +2 -2
  35. package/include/numkong/dot/rvvbb.h +2 -2
  36. package/include/numkong/dot/rvvbf16.h +2 -2
  37. package/include/numkong/dot/rvvhalf.h +2 -2
  38. package/include/numkong/dot/sapphire.h +2 -2
  39. package/include/numkong/dot/sierra.h +2 -2
  40. package/include/numkong/dot/skylake.h +2 -2
  41. package/include/numkong/dot/sve.h +2 -2
  42. package/include/numkong/dot/svebfdot.h +2 -2
  43. package/include/numkong/dot/svehalf.h +2 -2
  44. package/include/numkong/dot/svesdot.h +2 -2
  45. package/include/numkong/dots/alder.h +2 -2
  46. package/include/numkong/dots/diamond.h +2 -2
  47. package/include/numkong/dots/genoa.h +2 -2
  48. package/include/numkong/dots/haswell.h +2 -2
  49. package/include/numkong/dots/icelake.h +2 -2
  50. package/include/numkong/dots/loongsonasx.h +2 -2
  51. package/include/numkong/dots/neon.h +2 -2
  52. package/include/numkong/dots/neonbfdot.h +2 -2
  53. package/include/numkong/dots/neonfhm.h +2 -2
  54. package/include/numkong/dots/neonfp8.h +2 -2
  55. package/include/numkong/dots/neonsdot.h +2 -2
  56. package/include/numkong/dots/powervsx.h +2 -2
  57. package/include/numkong/dots/rvv.h +2 -2
  58. package/include/numkong/dots/sapphireamx.h +2 -2
  59. package/include/numkong/dots/sierra.h +2 -2
  60. package/include/numkong/dots/skylake.h +2 -2
  61. package/include/numkong/dots/sme.h +10 -10
  62. package/include/numkong/dots/smebi32.h +2 -2
  63. package/include/numkong/dots/smef64.h +2 -2
  64. package/include/numkong/dots/smehalf.h +2 -2
  65. package/include/numkong/each/haswell.h +2 -2
  66. package/include/numkong/each/icelake.h +2 -2
  67. package/include/numkong/each/neon.h +2 -2
  68. package/include/numkong/each/neonbfdot.h +2 -2
  69. package/include/numkong/each/neonhalf.h +2 -2
  70. package/include/numkong/each/rvv.h +2 -2
  71. package/include/numkong/each/sapphire.h +2 -2
  72. package/include/numkong/each/skylake.h +2 -2
  73. package/include/numkong/geospatial/haswell.h +2 -2
  74. package/include/numkong/geospatial/neon.h +2 -2
  75. package/include/numkong/geospatial/rvv.h +2 -2
  76. package/include/numkong/geospatial/skylake.h +2 -2
  77. package/include/numkong/maxsim/alder.h +2 -2
  78. package/include/numkong/maxsim/genoa.h +2 -2
  79. package/include/numkong/maxsim/haswell.h +2 -2
  80. package/include/numkong/maxsim/icelake.h +2 -2
  81. package/include/numkong/maxsim/neonsdot.h +2 -2
  82. package/include/numkong/maxsim/sapphireamx.h +2 -2
  83. package/include/numkong/maxsim/sme.h +2 -2
  84. package/include/numkong/mesh/haswell.h +2 -2
  85. package/include/numkong/mesh/neon.h +2 -2
  86. package/include/numkong/mesh/neonbfdot.h +2 -2
  87. package/include/numkong/mesh/rvv.h +2 -2
  88. package/include/numkong/mesh/skylake.h +2 -2
  89. package/include/numkong/numkong.h +1 -1
  90. package/include/numkong/probability/haswell.h +2 -2
  91. package/include/numkong/probability/neon.h +2 -2
  92. package/include/numkong/probability/rvv.h +2 -2
  93. package/include/numkong/probability/skylake.h +2 -2
  94. package/include/numkong/reduce/alder.h +2 -2
  95. package/include/numkong/reduce/genoa.h +2 -2
  96. package/include/numkong/reduce/haswell.h +2 -2
  97. package/include/numkong/reduce/icelake.h +2 -2
  98. package/include/numkong/reduce/neon.h +2 -2
  99. package/include/numkong/reduce/neonbfdot.h +2 -2
  100. package/include/numkong/reduce/neonfhm.h +2 -2
  101. package/include/numkong/reduce/neonsdot.h +2 -2
  102. package/include/numkong/reduce/rvv.h +2 -2
  103. package/include/numkong/reduce/sierra.h +2 -2
  104. package/include/numkong/reduce/skylake.h +2 -2
  105. package/include/numkong/scalar/haswell.h +2 -2
  106. package/include/numkong/scalar/loongsonasx.h +2 -2
  107. package/include/numkong/scalar/neon.h +2 -2
  108. package/include/numkong/scalar/neonhalf.h +2 -2
  109. package/include/numkong/scalar/powervsx.h +2 -2
  110. package/include/numkong/scalar/rvv.h +2 -2
  111. package/include/numkong/scalar/sapphire.h +2 -2
  112. package/include/numkong/set/haswell.h +2 -2
  113. package/include/numkong/set/icelake.h +2 -2
  114. package/include/numkong/set/loongsonasx.h +2 -2
  115. package/include/numkong/set/neon.h +2 -2
  116. package/include/numkong/set/powervsx.h +2 -2
  117. package/include/numkong/set/rvv.h +2 -2
  118. package/include/numkong/set/rvvbb.h +2 -2
  119. package/include/numkong/set/sve.h +2 -2
  120. package/include/numkong/sets/haswell.h +2 -2
  121. package/include/numkong/sets/icelake.h +2 -2
  122. package/include/numkong/sets/loongsonasx.h +2 -2
  123. package/include/numkong/sets/neon.h +2 -2
  124. package/include/numkong/sets/powervsx.h +2 -2
  125. package/include/numkong/sets/smebi32.h +2 -2
  126. package/include/numkong/sparse/icelake.h +2 -2
  127. package/include/numkong/sparse/neon.h +2 -2
  128. package/include/numkong/sparse/sve2.h +2 -2
  129. package/include/numkong/sparse/turin.h +2 -2
  130. package/include/numkong/spatial/alder.h +2 -2
  131. package/include/numkong/spatial/diamond.h +2 -2
  132. package/include/numkong/spatial/genoa.h +2 -2
  133. package/include/numkong/spatial/haswell.h +2 -2
  134. package/include/numkong/spatial/icelake.h +2 -2
  135. package/include/numkong/spatial/loongsonasx.h +2 -2
  136. package/include/numkong/spatial/neon.h +2 -2
  137. package/include/numkong/spatial/neonbfdot.h +2 -2
  138. package/include/numkong/spatial/neonfp8.h +2 -2
  139. package/include/numkong/spatial/neonsdot.h +2 -2
  140. package/include/numkong/spatial/powervsx.h +2 -2
  141. package/include/numkong/spatial/rvv.h +2 -2
  142. package/include/numkong/spatial/rvvbf16.h +2 -2
  143. package/include/numkong/spatial/rvvhalf.h +2 -2
  144. package/include/numkong/spatial/sierra.h +2 -2
  145. package/include/numkong/spatial/skylake.h +2 -2
  146. package/include/numkong/spatial/sve.h +2 -2
  147. package/include/numkong/spatial/svebfdot.h +2 -2
  148. package/include/numkong/spatial/svehalf.h +2 -2
  149. package/include/numkong/spatial/svesdot.h +2 -2
  150. package/include/numkong/spatials/alder.h +2 -2
  151. package/include/numkong/spatials/diamond.h +2 -2
  152. package/include/numkong/spatials/genoa.h +2 -2
  153. package/include/numkong/spatials/haswell.h +2 -2
  154. package/include/numkong/spatials/icelake.h +2 -2
  155. package/include/numkong/spatials/loongsonasx.h +2 -2
  156. package/include/numkong/spatials/neon.h +2 -2
  157. package/include/numkong/spatials/neonbfdot.h +2 -2
  158. package/include/numkong/spatials/neonfhm.h +2 -2
  159. package/include/numkong/spatials/neonfp8.h +2 -2
  160. package/include/numkong/spatials/neonsdot.h +2 -2
  161. package/include/numkong/spatials/powervsx.h +2 -2
  162. package/include/numkong/spatials/rvv.h +2 -2
  163. package/include/numkong/spatials/sapphireamx.h +2 -2
  164. package/include/numkong/spatials/sierra.h +2 -2
  165. package/include/numkong/spatials/skylake.h +2 -2
  166. package/include/numkong/spatials/sme.h +2 -2
  167. package/include/numkong/spatials/smef64.h +2 -2
  168. package/include/numkong/trigonometry/haswell.h +2 -2
  169. package/include/numkong/trigonometry/neon.h +2 -2
  170. package/include/numkong/trigonometry/rvv.h +2 -2
  171. package/include/numkong/trigonometry/skylake.h +2 -2
  172. package/include/numkong/types.h +88 -80
  173. package/package.json +7 -7
@@ -30,7 +30,7 @@
30
30
  #ifndef NK_DOT_NEONFP8_H
31
31
  #define NK_DOT_NEONFP8_H
32
32
 
33
- #if NK_TARGET_ARM_
33
+ #if NK_TARGET_ARM64_
34
34
  #if NK_TARGET_NEONFP8
35
35
 
36
36
  #include "numkong/types.h"
@@ -319,5 +319,5 @@ NK_INTERNAL void nk_dot_e3m2x16_finalize_neonfp8(
319
319
  #endif
320
320
 
321
321
  #endif // NK_TARGET_NEONFP8
322
- #endif // NK_TARGET_ARM_
322
+ #endif // NK_TARGET_ARM64_
323
323
  #endif // NK_DOT_NEONFP8_H
@@ -96,7 +96,7 @@
96
96
  #ifndef NK_DOT_NEONSDOT_H
97
97
  #define NK_DOT_NEONSDOT_H
98
98
 
99
- #if NK_TARGET_ARM_
99
+ #if NK_TARGET_ARM64_
100
100
  #if NK_TARGET_NEONSDOT
101
101
 
102
102
  #include "numkong/types.h"
@@ -618,5 +618,5 @@ NK_INTERNAL void nk_dot_e3m2x16_finalize_neonsdot(
618
618
  #endif
619
619
 
620
620
  #endif // NK_TARGET_NEONSDOT
621
- #endif // NK_TARGET_ARM_
621
+ #endif // NK_TARGET_ARM64_
622
622
  #endif // NK_DOT_NEONSDOT_H
@@ -22,7 +22,7 @@
22
22
  #ifndef NK_DOT_RVV_H
23
23
  #define NK_DOT_RVV_H
24
24
 
25
- #if NK_TARGET_RISCV_
25
+ #if NK_TARGET_RISCV64_
26
26
  #if NK_TARGET_RVV
27
27
 
28
28
  #include "numkong/types.h"
@@ -718,5 +718,5 @@ NK_PUBLIC void nk_vdot_f64c_rvv(nk_f64c_t const *a_pairs, nk_f64c_t const *b_pai
718
718
  #endif
719
719
 
720
720
  #endif // NK_TARGET_RVV
721
- #endif // NK_TARGET_RISCV_
721
+ #endif // NK_TARGET_RISCV64_
722
722
  #endif // NK_DOT_RVV_H
@@ -15,7 +15,7 @@
15
15
  #ifndef NK_DOT_RVVBB_H
16
16
  #define NK_DOT_RVVBB_H
17
17
 
18
- #if NK_TARGET_RISCV_
18
+ #if NK_TARGET_RISCV64_
19
19
  #if NK_TARGET_RVVBB
20
20
 
21
21
  #include "numkong/types.h"
@@ -68,5 +68,5 @@ NK_PUBLIC void nk_dot_u1_rvvbb(nk_u1x8_t const *a, nk_u1x8_t const *b, nk_size_t
68
68
  #endif
69
69
 
70
70
  #endif // NK_TARGET_RVVBB
71
- #endif // NK_TARGET_RISCV_
71
+ #endif // NK_TARGET_RISCV64_
72
72
  #endif // NK_DOT_RVVBB_H
@@ -18,7 +18,7 @@
18
18
  #ifndef NK_DOT_RVVBF16_H
19
19
  #define NK_DOT_RVVBF16_H
20
20
 
21
- #if NK_TARGET_RISCV_
21
+ #if NK_TARGET_RISCV64_
22
22
  #if NK_TARGET_RVVBF16
23
23
 
24
24
  #include "numkong/types.h"
@@ -119,5 +119,5 @@ NK_PUBLIC void nk_dot_e5m2_rvvbf16(nk_e5m2_t const *a_scalars, nk_e5m2_t const *
119
119
  #endif
120
120
 
121
121
  #endif // NK_TARGET_RVVBF16
122
- #endif // NK_TARGET_RISCV_
122
+ #endif // NK_TARGET_RISCV64_
123
123
  #endif // NK_DOT_RVVBF16_H
@@ -19,7 +19,7 @@
19
19
  #ifndef NK_DOT_RVVHALF_H
20
20
  #define NK_DOT_RVVHALF_H
21
21
 
22
- #if NK_TARGET_RISCV_
22
+ #if NK_TARGET_RISCV64_
23
23
  #if NK_TARGET_RVVHALF
24
24
 
25
25
  #include "numkong/types.h"
@@ -125,5 +125,5 @@ NK_PUBLIC void nk_dot_e5m2_rvvhalf(nk_e5m2_t const *a_scalars, nk_e5m2_t const *
125
125
  #endif
126
126
 
127
127
  #endif // NK_TARGET_RVVHALF
128
- #endif // NK_TARGET_RISCV_
128
+ #endif // NK_TARGET_RISCV64_
129
129
  #endif // NK_DOT_RVVHALF_H
@@ -32,7 +32,7 @@
32
32
  #ifndef NK_DOT_SAPPHIRE_H
33
33
  #define NK_DOT_SAPPHIRE_H
34
34
 
35
- #if NK_TARGET_X86_
35
+ #if NK_TARGET_X8664_
36
36
  #if NK_TARGET_SAPPHIRE
37
37
 
38
38
  #include "numkong/types.h"
@@ -137,5 +137,5 @@ NK_INTERNAL __m512 nk_flush_f16_to_f32_sapphire_(__m512h acc_f16x32, __m512 sum_
137
137
  #endif
138
138
 
139
139
  #endif // NK_TARGET_SAPPHIRE
140
- #endif // NK_TARGET_X86_
140
+ #endif // NK_TARGET_X8664_
141
141
  #endif // NK_DOT_SAPPHIRE_H
@@ -75,7 +75,7 @@
75
75
  #ifndef NK_DOT_SIERRA_H
76
76
  #define NK_DOT_SIERRA_H
77
77
 
78
- #if NK_TARGET_X86_
78
+ #if NK_TARGET_X8664_
79
79
  #if NK_TARGET_SIERRA
80
80
 
81
81
  #include "numkong/types.h"
@@ -401,5 +401,5 @@ NK_INTERNAL void nk_dot_e2m3x32_finalize_sierra(
401
401
  #endif
402
402
 
403
403
  #endif // NK_TARGET_SIERRA
404
- #endif // NK_TARGET_X86_
404
+ #endif // NK_TARGET_X8664_
405
405
  #endif // NK_DOT_SIERRA_H
@@ -78,7 +78,7 @@
78
78
  #ifndef NK_DOT_SKYLAKE_H
79
79
  #define NK_DOT_SKYLAKE_H
80
80
 
81
- #if NK_TARGET_X86_
81
+ #if NK_TARGET_X8664_
82
82
  #if NK_TARGET_SKYLAKE
83
83
 
84
84
  #include "numkong/cast/skylake.h" // `nk_bf16x16_to_f32x16_skylake_`
@@ -1112,5 +1112,5 @@ NK_INTERNAL void nk_dot_e3m2x64_finalize_skylake(
1112
1112
  #endif
1113
1113
 
1114
1114
  #endif // NK_TARGET_SKYLAKE
1115
- #endif // NK_TARGET_X86_
1115
+ #endif // NK_TARGET_X8664_
1116
1116
  #endif // NK_DOT_SKYLAKE_H
@@ -35,7 +35,7 @@
35
35
  #ifndef NK_DOT_SVE_H
36
36
  #define NK_DOT_SVE_H
37
37
 
38
- #if NK_TARGET_ARM_
38
+ #if NK_TARGET_ARM64_
39
39
  #if NK_TARGET_SVE
40
40
 
41
41
  #include "numkong/types.h" // `nk_f32_t`
@@ -415,5 +415,5 @@ NK_PUBLIC void nk_vdot_f64c_sve(nk_f64c_t const *a_pairs, nk_f64c_t const *b_pai
415
415
  #endif
416
416
 
417
417
  #endif // NK_TARGET_SVE
418
- #endif // NK_TARGET_ARM_
418
+ #endif // NK_TARGET_ARM64_
419
419
  #endif // NK_DOT_SVE_H
@@ -27,7 +27,7 @@
27
27
  #ifndef NK_DOT_SVEBFDOT_H
28
28
  #define NK_DOT_SVEBFDOT_H
29
29
 
30
- #if NK_TARGET_ARM_
30
+ #if NK_TARGET_ARM64_
31
31
  #if NK_TARGET_SVEBFDOT
32
32
 
33
33
  #include "numkong/types.h"
@@ -70,5 +70,5 @@ NK_PUBLIC void nk_dot_bf16_svebfdot(nk_bf16_t const *a_scalars, nk_bf16_t const
70
70
  #endif
71
71
 
72
72
  #endif // NK_TARGET_SVEBFDOT
73
- #endif // NK_TARGET_ARM_
73
+ #endif // NK_TARGET_ARM64_
74
74
  #endif // NK_DOT_SVEBFDOT_H
@@ -29,7 +29,7 @@
29
29
  #ifndef NK_DOT_SVEHALF_H
30
30
  #define NK_DOT_SVEHALF_H
31
31
 
32
- #if NK_TARGET_ARM_
32
+ #if NK_TARGET_ARM64_
33
33
  #if NK_TARGET_SVEHALF
34
34
 
35
35
  #include "numkong/types.h" // `nk_f16_t`
@@ -163,5 +163,5 @@ NK_PUBLIC void nk_vdot_f16c_svehalf(nk_f16c_t const *a_pairs, nk_f16c_t const *b
163
163
  #endif
164
164
 
165
165
  #endif // NK_TARGET_SVEHALF
166
- #endif // NK_TARGET_ARM_
166
+ #endif // NK_TARGET_ARM64_
167
167
  #endif // NK_DOT_SVEHALF_H
@@ -30,7 +30,7 @@
30
30
  #ifndef NK_DOT_SVESDOT_H
31
31
  #define NK_DOT_SVESDOT_H
32
32
 
33
- #if NK_TARGET_ARM_
33
+ #if NK_TARGET_ARM64_
34
34
  #if NK_TARGET_SVESDOT
35
35
 
36
36
  #include "numkong/types.h"
@@ -85,5 +85,5 @@ NK_PUBLIC void nk_dot_u8_svesdot(nk_u8_t const *a_scalars, nk_u8_t const *b_scal
85
85
  #endif
86
86
 
87
87
  #endif // NK_TARGET_SVESDOT
88
- #endif // NK_TARGET_ARM_
88
+ #endif // NK_TARGET_ARM64_
89
89
  #endif // NK_DOT_SVESDOT_H
@@ -12,7 +12,7 @@
12
12
  #ifndef NK_DOTS_ALDER_H
13
13
  #define NK_DOTS_ALDER_H
14
14
 
15
- #if NK_TARGET_X86_
15
+ #if NK_TARGET_X8664_
16
16
  #if NK_TARGET_ALDER
17
17
 
18
18
  #include "numkong/dot/alder.h" // Alder-specific dot product helpers
@@ -113,5 +113,5 @@ nk_define_cross_packed_(dots, e2m3, alder, e2m3, e2m3, f32, nk_b256_vec_t, nk_do
113
113
  #endif
114
114
 
115
115
  #endif // NK_TARGET_ALDER
116
- #endif // NK_TARGET_X86_
116
+ #endif // NK_TARGET_X8664_
117
117
  #endif // NK_DOTS_ALDER_H
@@ -12,7 +12,7 @@
12
12
  #ifndef NK_DOTS_DIAMOND_H
13
13
  #define NK_DOTS_DIAMOND_H
14
14
 
15
- #if NK_TARGET_X86_
15
+ #if NK_TARGET_X8664_
16
16
  #if NK_TARGET_DIAMOND
17
17
 
18
18
  #include "numkong/dot/diamond.h"
@@ -82,5 +82,5 @@ nk_define_cross_packed_(dots, e5m2, diamond, e5m2, e5m2, f32, nk_b512_vec_t, nk_
82
82
  #endif
83
83
 
84
84
  #endif // NK_TARGET_DIAMOND
85
- #endif // NK_TARGET_X86_
85
+ #endif // NK_TARGET_X8664_
86
86
  #endif // NK_DOTS_DIAMOND_H
@@ -9,7 +9,7 @@
9
9
  #ifndef NK_DOTS_GENOA_H
10
10
  #define NK_DOTS_GENOA_H
11
11
 
12
- #if NK_TARGET_X86_
12
+ #if NK_TARGET_X8664_
13
13
  #if NK_TARGET_GENOA
14
14
 
15
15
  #include "numkong/dot/genoa.h"
@@ -96,5 +96,5 @@ nk_define_cross_packed_(dots, e5m2, genoa, e5m2, bf16, f32, nk_b512_vec_t, nk_do
96
96
  #endif
97
97
 
98
98
  #endif // NK_TARGET_GENOA
99
- #endif // NK_TARGET_X86_
99
+ #endif // NK_TARGET_X8664_
100
100
  #endif // NK_DOTS_GENOA_H
@@ -23,7 +23,7 @@
23
23
  #ifndef NK_DOTS_HASWELL_H
24
24
  #define NK_DOTS_HASWELL_H
25
25
 
26
- #if NK_TARGET_X86_
26
+ #if NK_TARGET_X8664_
27
27
  #if NK_TARGET_HASWELL
28
28
 
29
29
  #include "numkong/dot/haswell.h"
@@ -306,5 +306,5 @@ nk_define_cross_packed_(dots, u1, haswell, u1x8, u1x8, u32, nk_b128_vec_t, nk_do
306
306
  #endif
307
307
 
308
308
  #endif // NK_TARGET_HASWELL
309
- #endif // NK_TARGET_X86_
309
+ #endif // NK_TARGET_X8664_
310
310
  #endif // NK_DOTS_HASWELL_H
@@ -21,7 +21,7 @@
21
21
  #ifndef NK_DOTS_ICELAKE_H
22
22
  #define NK_DOTS_ICELAKE_H
23
23
 
24
- #if NK_TARGET_X86_
24
+ #if NK_TARGET_X8664_
25
25
  #if NK_TARGET_ICELAKE
26
26
 
27
27
  #include "numkong/dot/icelake.h"
@@ -176,5 +176,5 @@ nk_define_cross_packed_(dots, u1, icelake, u1x8, u1x8, u32, nk_b512_vec_t, nk_do
176
176
  #endif
177
177
 
178
178
  #endif // NK_TARGET_ICELAKE
179
- #endif // NK_TARGET_X86_
179
+ #endif // NK_TARGET_X8664_
180
180
  #endif // NK_DOTS_ICELAKE_H
@@ -13,7 +13,7 @@
13
13
  #ifndef NK_DOTS_LOONGSONASX_H
14
14
  #define NK_DOTS_LOONGSONASX_H
15
15
 
16
- #if NK_TARGET_LOONGARCH_
16
+ #if NK_TARGET_LOONGARCH64_
17
17
  #if NK_TARGET_LOONGSONASX
18
18
 
19
19
  #include "numkong/dot/loongsonasx.h"
@@ -172,5 +172,5 @@ nk_define_cross_packed_(dots, f16, loongsonasx, f16, f32, f32, nk_b256_vec_t, nk
172
172
  #endif
173
173
 
174
174
  #endif // NK_TARGET_LOONGSONASX
175
- #endif // NK_TARGET_LOONGARCH_
175
+ #endif // NK_TARGET_LOONGARCH64_
176
176
  #endif // NK_DOTS_LOONGSONASX_H
@@ -9,7 +9,7 @@
9
9
  #ifndef NK_DOTS_NEON_H
10
10
  #define NK_DOTS_NEON_H
11
11
 
12
- #if NK_TARGET_ARM_
12
+ #if NK_TARGET_ARM64_
13
13
  #if NK_TARGET_NEON
14
14
 
15
15
  #include "numkong/dot/neon.h"
@@ -119,5 +119,5 @@ nk_define_cross_packed_(dots, f64, neon, f64, f64, f64, nk_b128_vec_t, nk_dot_f6
119
119
  #endif
120
120
 
121
121
  #endif // NK_TARGET_NEON
122
- #endif // NK_TARGET_ARM_
122
+ #endif // NK_TARGET_ARM64_
123
123
  #endif // NK_DOTS_NEON_H
@@ -9,7 +9,7 @@
9
9
  #ifndef NK_DOTS_NEONBFDOT_H
10
10
  #define NK_DOTS_NEONBFDOT_H
11
11
 
12
- #if NK_TARGET_ARM_
12
+ #if NK_TARGET_ARM64_
13
13
  #if NK_TARGET_NEONBFDOT
14
14
 
15
15
  #include "numkong/dot/neonbfdot.h"
@@ -55,5 +55,5 @@ nk_define_cross_packed_(dots, bf16, neonbfdot, bf16, bf16, f32, nk_b128_vec_t, n
55
55
  #endif
56
56
 
57
57
  #endif // NK_TARGET_NEONBFDOT
58
- #endif // NK_TARGET_ARM_
58
+ #endif // NK_TARGET_ARM64_
59
59
  #endif // NK_DOTS_NEONBFDOT_H
@@ -12,7 +12,7 @@
12
12
  #ifndef NK_DOTS_NEONFHM_H
13
13
  #define NK_DOTS_NEONFHM_H
14
14
 
15
- #if NK_TARGET_ARM_
15
+ #if NK_TARGET_ARM64_
16
16
  #if NK_TARGET_NEONFHM
17
17
 
18
18
  #include "numkong/dot/neonfhm.h"
@@ -92,5 +92,5 @@ nk_define_cross_packed_(dots, e5m2, neonfhm, e5m2, e5m2, f32, nk_b128_vec_t, nk_
92
92
  #endif
93
93
 
94
94
  #endif // NK_TARGET_NEONFHM
95
- #endif // NK_TARGET_ARM_
95
+ #endif // NK_TARGET_ARM64_
96
96
  #endif // NK_DOTS_NEONFHM_H
@@ -12,7 +12,7 @@
12
12
  #ifndef NK_DOTS_NEONFP8_H
13
13
  #define NK_DOTS_NEONFP8_H
14
14
 
15
- #if NK_TARGET_ARM_
15
+ #if NK_TARGET_ARM64_
16
16
  #if NK_TARGET_NEONFP8
17
17
 
18
18
  #include "numkong/dot/neonfp8.h"
@@ -95,5 +95,5 @@ nk_define_cross_packed_(dots, e3m2, neonfp8, e3m2, e3m2, f32, nk_b128_vec_t, nk_
95
95
  #endif
96
96
 
97
97
  #endif // NK_TARGET_NEONFP8
98
- #endif // NK_TARGET_ARM_
98
+ #endif // NK_TARGET_ARM64_
99
99
  #endif // NK_DOTS_NEONFP8_H
@@ -9,7 +9,7 @@
9
9
  #ifndef NK_DOTS_NEONSDOT_H
10
10
  #define NK_DOTS_NEONSDOT_H
11
11
 
12
- #if NK_TARGET_ARM_
12
+ #if NK_TARGET_ARM64_
13
13
  #if NK_TARGET_NEONSDOT
14
14
 
15
15
  #include "numkong/dot/neonsdot.h"
@@ -140,5 +140,5 @@ nk_define_cross_packed_(dots, e3m2, neonsdot, e3m2, e3m2, f32, nk_b128_vec_t, nk
140
140
  #endif
141
141
 
142
142
  #endif // NK_TARGET_NEONSDOT
143
- #endif // NK_TARGET_ARM_
143
+ #endif // NK_TARGET_ARM64_
144
144
  #endif // NK_DOTS_NEONSDOT_H
@@ -24,7 +24,7 @@
24
24
  #ifndef NK_DOTS_POWERVSX_H
25
25
  #define NK_DOTS_POWERVSX_H
26
26
 
27
- #if NK_TARGET_POWER_
27
+ #if NK_TARGET_POWER64_
28
28
  #if NK_TARGET_POWERVSX
29
29
 
30
30
  #include "numkong/dot/powervsx.h"
@@ -190,5 +190,5 @@ nk_define_cross_packed_(dots, f64, powervsx, f64, f64, f64, nk_b128_vec_t, nk_do
190
190
  #endif
191
191
 
192
192
  #endif // NK_TARGET_POWERVSX
193
- #endif // NK_TARGET_POWER_
193
+ #endif // NK_TARGET_POWER64_
194
194
  #endif // NK_DOTS_POWERVSX_H
@@ -43,7 +43,7 @@
43
43
  #ifndef NK_DOTS_RVV_H
44
44
  #define NK_DOTS_RVV_H
45
45
 
46
- #if NK_TARGET_RISCV_
46
+ #if NK_TARGET_RISCV64_
47
47
  #if NK_TARGET_RVV
48
48
 
49
49
  #include "numkong/types.h"
@@ -2589,5 +2589,5 @@ NK_PUBLIC void nk_dots_symmetric_e5m2_rvv(nk_e5m2_t const *vectors, nk_size_t ve
2589
2589
  #endif
2590
2590
 
2591
2591
  #endif // NK_TARGET_RVV
2592
- #endif // NK_TARGET_RISCV_
2592
+ #endif // NK_TARGET_RISCV64_
2593
2593
  #endif // NK_DOTS_RVV_H
@@ -69,7 +69,7 @@
69
69
  #ifndef NK_DOTS_SAPPHIREAMX_H
70
70
  #define NK_DOTS_SAPPHIREAMX_H
71
71
 
72
- #if NK_TARGET_X86_
72
+ #if NK_TARGET_X8664_
73
73
  #if NK_TARGET_SAPPHIREAMX
74
74
 
75
75
  #include "numkong/cast/icelake.h" // For FP8 ↔ BF16 conversions
@@ -4013,5 +4013,5 @@ NK_PUBLIC void nk_dots_symmetric_e3m2_sapphireamx(
4013
4013
  #endif
4014
4014
 
4015
4015
  #endif // NK_TARGET_SAPPHIREAMX
4016
- #endif // NK_TARGET_X86_
4016
+ #endif // NK_TARGET_X8664_
4017
4017
  #endif // NK_DOTS_SAPPHIREAMX_H
@@ -13,7 +13,7 @@
13
13
  #ifndef NK_DOTS_SIERRA_H
14
14
  #define NK_DOTS_SIERRA_H
15
15
 
16
- #if NK_TARGET_X86_
16
+ #if NK_TARGET_X8664_
17
17
  #if NK_TARGET_SIERRA
18
18
 
19
19
  #include "numkong/dot/sierra.h" // Sierra-specific dot product helpers
@@ -96,5 +96,5 @@ nk_define_cross_packed_(dots, e2m3, sierra, e2m3, e2m3, f32, nk_b256_vec_t, nk_d
96
96
  #endif
97
97
 
98
98
  #endif // NK_TARGET_SIERRA
99
- #endif // NK_TARGET_X86_
99
+ #endif // NK_TARGET_X8664_
100
100
  #endif // NK_DOTS_SIERRA_H
@@ -21,7 +21,7 @@
21
21
  #ifndef NK_DOTS_SKYLAKE_H
22
22
  #define NK_DOTS_SKYLAKE_H
23
23
 
24
- #if NK_TARGET_X86_
24
+ #if NK_TARGET_X8664_
25
25
  #if NK_TARGET_SKYLAKE
26
26
 
27
27
  #include "numkong/dot/skylake.h"
@@ -203,5 +203,5 @@ nk_define_cross_packed_(dots, e3m2, skylake, e3m2, e3m2, f32, nk_b512_vec_t, nk_
203
203
  #endif
204
204
 
205
205
  #endif // NK_TARGET_SKYLAKE
206
- #endif // NK_TARGET_X86_
206
+ #endif // NK_TARGET_X8664_
207
207
  #endif // NK_DOTS_SKYLAKE_H
@@ -58,7 +58,7 @@
58
58
  #ifndef NK_DOTS_SME_H
59
59
  #define NK_DOTS_SME_H
60
60
 
61
- #if NK_TARGET_ARM_
61
+ #if NK_TARGET_ARM64_
62
62
  #if NK_TARGET_SME
63
63
 
64
64
  #include "numkong/types.h"
@@ -1520,8 +1520,8 @@ NK_PUBLIC svfloat16_t nk_e5m2x_to_f16x_ssve_(svbool_t predicate_b16x, svuint8_t
1520
1520
  * Converts `e4m3` → `f16` on-the-fly for A, B is pre-converted during packing.
1521
1521
  */
1522
1522
  __arm_locally_streaming __arm_new("za") static void nk_dots_packed_e4m3_sme_streaming_( //
1523
- nk_e4m3_t const *a, void const *b_packed, nk_f32_t *c, //
1524
- nk_size_t rows, nk_size_t columns, nk_size_t depth, //
1523
+ nk_e4m3_t const *a, void const *b_packed, nk_f32_t *c, //
1524
+ nk_size_t rows, nk_size_t columns, nk_size_t depth, //
1525
1525
  nk_size_t a_stride_elements, nk_size_t c_stride_elements) {
1526
1526
 
1527
1527
  nk_dots_sme_packed_header_t const *header = (nk_dots_sme_packed_header_t const *)b_packed;
@@ -2032,8 +2032,8 @@ NK_PUBLIC void nk_dots_symmetric_e4m3_sme(nk_e4m3_t const *vectors, nk_size_t ve
2032
2032
  * Converts `e5m2` → `f16` on-the-fly for A, B is pre-converted during packing.
2033
2033
  */
2034
2034
  __arm_locally_streaming __arm_new("za") static void nk_dots_packed_e5m2_sme_streaming_( //
2035
- nk_e5m2_t const *a, void const *b_packed, nk_f32_t *c, //
2036
- nk_size_t rows, nk_size_t columns, nk_size_t depth, //
2035
+ nk_e5m2_t const *a, void const *b_packed, nk_f32_t *c, //
2036
+ nk_size_t rows, nk_size_t columns, nk_size_t depth, //
2037
2037
  nk_size_t a_stride_elements, nk_size_t c_stride_elements) {
2038
2038
 
2039
2039
  nk_dots_sme_packed_header_t const *header = (nk_dots_sme_packed_header_t const *)b_packed;
@@ -2491,8 +2491,8 @@ NK_PUBLIC svint8_t nk_e2m3x_to_i8x_ssve_(svbool_t predicate_b8x, svuint8_t raw_b
2491
2491
  * Accumulates in `i32` via `svmopa_za32_s8_m`, then converts to `f32` with 1/256 scaling.
2492
2492
  */
2493
2493
  __arm_locally_streaming __arm_new("za") static void nk_dots_packed_e2m3_sme_streaming_( //
2494
- nk_e2m3_t const *a, void const *b_packed, nk_f32_t *c, //
2495
- nk_size_t rows, nk_size_t columns, nk_size_t depth, //
2494
+ nk_e2m3_t const *a, void const *b_packed, nk_f32_t *c, //
2495
+ nk_size_t rows, nk_size_t columns, nk_size_t depth, //
2496
2496
  nk_size_t a_stride_elements, nk_size_t c_stride_elements) {
2497
2497
 
2498
2498
  nk_dots_sme_packed_header_t const *header = (nk_dots_sme_packed_header_t const *)b_packed;
@@ -3013,8 +3013,8 @@ NK_PUBLIC svfloat16_t nk_e3m2x_to_f16x_ssve_(svbool_t predicate_b16x, svuint8_t
3013
3013
  * Converts `e3m2` → `f16` on-the-fly for A, B is pre-converted during packing.
3014
3014
  */
3015
3015
  __arm_locally_streaming __arm_new("za") static void nk_dots_packed_e3m2_sme_streaming_( //
3016
- nk_e3m2_t const *a, void const *b_packed, nk_f32_t *c, //
3017
- nk_size_t rows, nk_size_t columns, nk_size_t depth, //
3016
+ nk_e3m2_t const *a, void const *b_packed, nk_f32_t *c, //
3017
+ nk_size_t rows, nk_size_t columns, nk_size_t depth, //
3018
3018
  nk_size_t a_stride_elements, nk_size_t c_stride_elements) {
3019
3019
 
3020
3020
  nk_dots_sme_packed_header_t const *header = (nk_dots_sme_packed_header_t const *)b_packed;
@@ -5005,5 +5005,5 @@ NK_PUBLIC void nk_dots_symmetric_i4_sme(nk_i4x2_t const *vectors, nk_size_t vect
5005
5005
  #endif
5006
5006
 
5007
5007
  #endif // NK_TARGET_SME
5008
- #endif // NK_TARGET_ARM_
5008
+ #endif // NK_TARGET_ARM64_
5009
5009
  #endif // NK_DOTS_SME_H
@@ -13,7 +13,7 @@
13
13
  #ifndef NK_DOTS_SMEBI32_H
14
14
  #define NK_DOTS_SMEBI32_H
15
15
 
16
- #if NK_TARGET_ARM_
16
+ #if NK_TARGET_ARM64_
17
17
  #if NK_TARGET_SMEBI32
18
18
 
19
19
  #include "numkong/types.h"
@@ -470,5 +470,5 @@ NK_PUBLIC void nk_dots_symmetric_u1_smebi32(nk_u1x8_t const *vectors, nk_size_t
470
470
  #endif
471
471
 
472
472
  #endif // NK_TARGET_SMEBI32
473
- #endif // NK_TARGET_ARM_
473
+ #endif // NK_TARGET_ARM64_
474
474
  #endif // NK_DOTS_SMEBI32_H
@@ -34,7 +34,7 @@
34
34
  #ifndef NK_DOTS_SMEF64_H
35
35
  #define NK_DOTS_SMEF64_H
36
36
 
37
- #if NK_TARGET_ARM_
37
+ #if NK_TARGET_ARM64_
38
38
  #if NK_TARGET_SME
39
39
 
40
40
  #include "numkong/types.h"
@@ -1319,5 +1319,5 @@ NK_PUBLIC void nk_dots_packed_f64_smef64(nk_f64_t const *a, void const *b_packed
1319
1319
  #endif
1320
1320
 
1321
1321
  #endif // NK_TARGET_SME
1322
- #endif // NK_TARGET_ARM_
1322
+ #endif // NK_TARGET_ARM64_
1323
1323
  #endif // NK_DOTS_SMEF64_H
@@ -26,7 +26,7 @@
26
26
  #ifndef NK_DOTS_SMEHALF_H
27
27
  #define NK_DOTS_SMEHALF_H
28
28
 
29
- #if NK_TARGET_ARM_
29
+ #if NK_TARGET_ARM64_
30
30
  #if NK_TARGET_SMEHALF
31
31
 
32
32
  #if defined(__cplusplus)
@@ -42,6 +42,6 @@ extern "C" {
42
42
  #endif
43
43
 
44
44
  #endif // NK_TARGET_SMEHALF
45
- #endif // NK_TARGET_ARM_
45
+ #endif // NK_TARGET_ARM64_
46
46
 
47
47
  #endif // NK_DOTS_SMEHALF_H
@@ -22,7 +22,7 @@
22
22
  #ifndef NK_EACH_HASWELL_H
23
23
  #define NK_EACH_HASWELL_H
24
24
 
25
- #if NK_TARGET_X86_
25
+ #if NK_TARGET_X8664_
26
26
  #if NK_TARGET_HASWELL
27
27
 
28
28
  #include "numkong/types.h"
@@ -1654,5 +1654,5 @@ NK_PUBLIC void nk_each_fma_f64c_haswell(nk_f64c_t const *a, nk_f64c_t const *b,
1654
1654
  #endif
1655
1655
 
1656
1656
  #endif // NK_TARGET_HASWELL
1657
- #endif // NK_TARGET_X86_
1657
+ #endif // NK_TARGET_X8664_
1658
1658
  #endif // NK_EACH_HASWELL_H
@@ -23,7 +23,7 @@
23
23
  #ifndef NK_EACH_ICELAKE_H
24
24
  #define NK_EACH_ICELAKE_H
25
25
 
26
- #if NK_TARGET_X86_
26
+ #if NK_TARGET_X8664_
27
27
  #if NK_TARGET_ICELAKE
28
28
 
29
29
  #include "numkong/types.h"
@@ -268,5 +268,5 @@ nk_each_sum_u64_icelake_cycle:
268
268
  #endif
269
269
 
270
270
  #endif // NK_TARGET_ICELAKE
271
- #endif // NK_TARGET_X86_
271
+ #endif // NK_TARGET_X8664_
272
272
  #endif // NK_EACH_ICELAKE_H
@@ -31,7 +31,7 @@
31
31
  #ifndef NK_EACH_NEON_H
32
32
  #define NK_EACH_NEON_H
33
33
 
34
- #if NK_TARGET_ARM_
34
+ #if NK_TARGET_ARM64_
35
35
  #if NK_TARGET_NEON
36
36
 
37
37
  #include "numkong/types.h"
@@ -1134,5 +1134,5 @@ NK_PUBLIC void nk_each_sum_i8_neon(nk_i8_t const *a, nk_i8_t const *b, nk_size_t
1134
1134
  #endif
1135
1135
 
1136
1136
  #endif // NK_TARGET_NEON
1137
- #endif // NK_TARGET_ARM_
1137
+ #endif // NK_TARGET_ARM64_
1138
1138
  #endif // NK_EACH_NEON_H
@@ -31,7 +31,7 @@
31
31
  #ifndef NK_EACH_NEONBFDOT_H
32
32
  #define NK_EACH_NEONBFDOT_H
33
33
 
34
- #if NK_TARGET_ARM_
34
+ #if NK_TARGET_ARM64_
35
35
  #if NK_TARGET_NEONBFDOT
36
36
 
37
37
  #include "numkong/types.h"
@@ -207,5 +207,5 @@ NK_PUBLIC void nk_each_fma_bf16_neonbfdot( //
207
207
  #endif
208
208
 
209
209
  #endif // NK_TARGET_NEONBFDOT
210
- #endif // NK_TARGET_ARM_
210
+ #endif // NK_TARGET_ARM64_
211
211
  #endif // NK_EACH_NEONBFDOT_H