numkong 7.4.1 → 7.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +86 -130
- package/binding.gyp +16 -0
- package/c/numkong.c +1 -1
- package/include/numkong/attention/sapphireamx.h +2 -2
- package/include/numkong/attention/sme.h +2 -2
- package/include/numkong/capabilities.h +47 -47
- package/include/numkong/cast/diamond.h +2 -2
- package/include/numkong/cast/haswell.h +2 -2
- package/include/numkong/cast/icelake.h +2 -2
- package/include/numkong/cast/loongsonasx.h +2 -2
- package/include/numkong/cast/neon.h +2 -2
- package/include/numkong/cast/powervsx.h +2 -2
- package/include/numkong/cast/rvv.h +2 -2
- package/include/numkong/cast/sapphire.h +2 -2
- package/include/numkong/cast/skylake.h +2 -2
- package/include/numkong/curved/genoa.h +2 -2
- package/include/numkong/curved/haswell.h +2 -2
- package/include/numkong/curved/neon.h +2 -2
- package/include/numkong/curved/neonbfdot.h +2 -2
- package/include/numkong/curved/rvv.h +2 -2
- package/include/numkong/curved/skylake.h +2 -2
- package/include/numkong/curved/smef64.h +2 -2
- package/include/numkong/dot/alder.h +2 -2
- package/include/numkong/dot/diamond.h +2 -2
- package/include/numkong/dot/genoa.h +2 -2
- package/include/numkong/dot/haswell.h +2 -2
- package/include/numkong/dot/icelake.h +2 -2
- package/include/numkong/dot/loongsonasx.h +2 -2
- package/include/numkong/dot/neon.h +2 -2
- package/include/numkong/dot/neonbfdot.h +2 -2
- package/include/numkong/dot/neonfhm.h +2 -2
- package/include/numkong/dot/neonfp8.h +2 -2
- package/include/numkong/dot/neonsdot.h +2 -2
- package/include/numkong/dot/rvv.h +2 -2
- package/include/numkong/dot/rvvbb.h +2 -2
- package/include/numkong/dot/rvvbf16.h +2 -2
- package/include/numkong/dot/rvvhalf.h +2 -2
- package/include/numkong/dot/sapphire.h +2 -2
- package/include/numkong/dot/sierra.h +2 -2
- package/include/numkong/dot/skylake.h +2 -2
- package/include/numkong/dot/sve.h +2 -2
- package/include/numkong/dot/svebfdot.h +2 -2
- package/include/numkong/dot/svehalf.h +2 -2
- package/include/numkong/dot/svesdot.h +2 -2
- package/include/numkong/dots/alder.h +2 -2
- package/include/numkong/dots/diamond.h +2 -2
- package/include/numkong/dots/genoa.h +2 -2
- package/include/numkong/dots/haswell.h +2 -2
- package/include/numkong/dots/icelake.h +2 -2
- package/include/numkong/dots/loongsonasx.h +2 -2
- package/include/numkong/dots/neon.h +2 -2
- package/include/numkong/dots/neonbfdot.h +2 -2
- package/include/numkong/dots/neonfhm.h +2 -2
- package/include/numkong/dots/neonfp8.h +2 -2
- package/include/numkong/dots/neonsdot.h +2 -2
- package/include/numkong/dots/powervsx.h +2 -2
- package/include/numkong/dots/rvv.h +2 -2
- package/include/numkong/dots/sapphireamx.h +2 -2
- package/include/numkong/dots/sierra.h +2 -2
- package/include/numkong/dots/skylake.h +2 -2
- package/include/numkong/dots/sme.h +10 -10
- package/include/numkong/dots/smebi32.h +2 -2
- package/include/numkong/dots/smef64.h +2 -2
- package/include/numkong/dots/smehalf.h +2 -2
- package/include/numkong/each/haswell.h +2 -2
- package/include/numkong/each/icelake.h +2 -2
- package/include/numkong/each/neon.h +2 -2
- package/include/numkong/each/neonbfdot.h +2 -2
- package/include/numkong/each/neonhalf.h +2 -2
- package/include/numkong/each/rvv.h +2 -2
- package/include/numkong/each/sapphire.h +2 -2
- package/include/numkong/each/skylake.h +2 -2
- package/include/numkong/geospatial/haswell.h +2 -2
- package/include/numkong/geospatial/neon.h +2 -2
- package/include/numkong/geospatial/rvv.h +2 -2
- package/include/numkong/geospatial/skylake.h +2 -2
- package/include/numkong/maxsim/alder.h +2 -2
- package/include/numkong/maxsim/genoa.h +2 -2
- package/include/numkong/maxsim/haswell.h +2 -2
- package/include/numkong/maxsim/icelake.h +2 -2
- package/include/numkong/maxsim/neonsdot.h +2 -2
- package/include/numkong/maxsim/sapphireamx.h +2 -2
- package/include/numkong/maxsim/sme.h +2 -2
- package/include/numkong/mesh/haswell.h +2 -2
- package/include/numkong/mesh/neon.h +2 -2
- package/include/numkong/mesh/neonbfdot.h +2 -2
- package/include/numkong/mesh/rvv.h +2 -2
- package/include/numkong/mesh/skylake.h +2 -2
- package/include/numkong/numkong.h +1 -1
- package/include/numkong/probability/haswell.h +2 -2
- package/include/numkong/probability/neon.h +2 -2
- package/include/numkong/probability/rvv.h +2 -2
- package/include/numkong/probability/skylake.h +2 -2
- package/include/numkong/reduce/alder.h +2 -2
- package/include/numkong/reduce/genoa.h +2 -2
- package/include/numkong/reduce/haswell.h +2 -2
- package/include/numkong/reduce/icelake.h +2 -2
- package/include/numkong/reduce/neon.h +2 -2
- package/include/numkong/reduce/neonbfdot.h +2 -2
- package/include/numkong/reduce/neonfhm.h +2 -2
- package/include/numkong/reduce/neonsdot.h +2 -2
- package/include/numkong/reduce/rvv.h +2 -2
- package/include/numkong/reduce/sierra.h +2 -2
- package/include/numkong/reduce/skylake.h +2 -2
- package/include/numkong/scalar/haswell.h +2 -2
- package/include/numkong/scalar/loongsonasx.h +2 -2
- package/include/numkong/scalar/neon.h +2 -2
- package/include/numkong/scalar/neonhalf.h +2 -2
- package/include/numkong/scalar/powervsx.h +2 -2
- package/include/numkong/scalar/rvv.h +2 -2
- package/include/numkong/scalar/sapphire.h +2 -2
- package/include/numkong/set/haswell.h +2 -2
- package/include/numkong/set/icelake.h +2 -2
- package/include/numkong/set/loongsonasx.h +2 -2
- package/include/numkong/set/neon.h +2 -2
- package/include/numkong/set/powervsx.h +2 -2
- package/include/numkong/set/rvv.h +2 -2
- package/include/numkong/set/rvvbb.h +2 -2
- package/include/numkong/set/sve.h +2 -2
- package/include/numkong/sets/haswell.h +2 -2
- package/include/numkong/sets/icelake.h +2 -2
- package/include/numkong/sets/loongsonasx.h +2 -2
- package/include/numkong/sets/neon.h +2 -2
- package/include/numkong/sets/powervsx.h +2 -2
- package/include/numkong/sets/smebi32.h +2 -2
- package/include/numkong/sparse/icelake.h +2 -2
- package/include/numkong/sparse/neon.h +2 -2
- package/include/numkong/sparse/sve2.h +2 -2
- package/include/numkong/sparse/turin.h +2 -2
- package/include/numkong/spatial/alder.h +2 -2
- package/include/numkong/spatial/diamond.h +2 -2
- package/include/numkong/spatial/genoa.h +2 -2
- package/include/numkong/spatial/haswell.h +2 -2
- package/include/numkong/spatial/icelake.h +2 -2
- package/include/numkong/spatial/loongsonasx.h +2 -2
- package/include/numkong/spatial/neon.h +2 -2
- package/include/numkong/spatial/neonbfdot.h +2 -2
- package/include/numkong/spatial/neonfp8.h +2 -2
- package/include/numkong/spatial/neonsdot.h +2 -2
- package/include/numkong/spatial/powervsx.h +2 -2
- package/include/numkong/spatial/rvv.h +2 -2
- package/include/numkong/spatial/rvvbf16.h +2 -2
- package/include/numkong/spatial/rvvhalf.h +2 -2
- package/include/numkong/spatial/sierra.h +2 -2
- package/include/numkong/spatial/skylake.h +2 -2
- package/include/numkong/spatial/sve.h +2 -2
- package/include/numkong/spatial/svebfdot.h +2 -2
- package/include/numkong/spatial/svehalf.h +2 -2
- package/include/numkong/spatial/svesdot.h +2 -2
- package/include/numkong/spatials/alder.h +2 -2
- package/include/numkong/spatials/diamond.h +2 -2
- package/include/numkong/spatials/genoa.h +2 -2
- package/include/numkong/spatials/haswell.h +2 -2
- package/include/numkong/spatials/icelake.h +2 -2
- package/include/numkong/spatials/loongsonasx.h +2 -2
- package/include/numkong/spatials/neon.h +2 -2
- package/include/numkong/spatials/neonbfdot.h +2 -2
- package/include/numkong/spatials/neonfhm.h +2 -2
- package/include/numkong/spatials/neonfp8.h +2 -2
- package/include/numkong/spatials/neonsdot.h +2 -2
- package/include/numkong/spatials/powervsx.h +2 -2
- package/include/numkong/spatials/rvv.h +2 -2
- package/include/numkong/spatials/sapphireamx.h +2 -2
- package/include/numkong/spatials/sierra.h +2 -2
- package/include/numkong/spatials/skylake.h +2 -2
- package/include/numkong/spatials/sme.h +2 -2
- package/include/numkong/spatials/smef64.h +2 -2
- package/include/numkong/trigonometry/haswell.h +2 -2
- package/include/numkong/trigonometry/neon.h +2 -2
- package/include/numkong/trigonometry/rvv.h +2 -2
- package/include/numkong/trigonometry/skylake.h +2 -2
- package/include/numkong/types.h +88 -80
- package/package.json +7 -7
|
@@ -30,7 +30,7 @@
|
|
|
30
30
|
#ifndef NK_DOT_NEONFP8_H
|
|
31
31
|
#define NK_DOT_NEONFP8_H
|
|
32
32
|
|
|
33
|
-
#if
|
|
33
|
+
#if NK_TARGET_ARM64_
|
|
34
34
|
#if NK_TARGET_NEONFP8
|
|
35
35
|
|
|
36
36
|
#include "numkong/types.h"
|
|
@@ -319,5 +319,5 @@ NK_INTERNAL void nk_dot_e3m2x16_finalize_neonfp8(
|
|
|
319
319
|
#endif
|
|
320
320
|
|
|
321
321
|
#endif // NK_TARGET_NEONFP8
|
|
322
|
-
#endif //
|
|
322
|
+
#endif // NK_TARGET_ARM64_
|
|
323
323
|
#endif // NK_DOT_NEONFP8_H
|
|
@@ -96,7 +96,7 @@
|
|
|
96
96
|
#ifndef NK_DOT_NEONSDOT_H
|
|
97
97
|
#define NK_DOT_NEONSDOT_H
|
|
98
98
|
|
|
99
|
-
#if
|
|
99
|
+
#if NK_TARGET_ARM64_
|
|
100
100
|
#if NK_TARGET_NEONSDOT
|
|
101
101
|
|
|
102
102
|
#include "numkong/types.h"
|
|
@@ -618,5 +618,5 @@ NK_INTERNAL void nk_dot_e3m2x16_finalize_neonsdot(
|
|
|
618
618
|
#endif
|
|
619
619
|
|
|
620
620
|
#endif // NK_TARGET_NEONSDOT
|
|
621
|
-
#endif //
|
|
621
|
+
#endif // NK_TARGET_ARM64_
|
|
622
622
|
#endif // NK_DOT_NEONSDOT_H
|
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
#ifndef NK_DOT_RVV_H
|
|
23
23
|
#define NK_DOT_RVV_H
|
|
24
24
|
|
|
25
|
-
#if
|
|
25
|
+
#if NK_TARGET_RISCV64_
|
|
26
26
|
#if NK_TARGET_RVV
|
|
27
27
|
|
|
28
28
|
#include "numkong/types.h"
|
|
@@ -718,5 +718,5 @@ NK_PUBLIC void nk_vdot_f64c_rvv(nk_f64c_t const *a_pairs, nk_f64c_t const *b_pai
|
|
|
718
718
|
#endif
|
|
719
719
|
|
|
720
720
|
#endif // NK_TARGET_RVV
|
|
721
|
-
#endif //
|
|
721
|
+
#endif // NK_TARGET_RISCV64_
|
|
722
722
|
#endif // NK_DOT_RVV_H
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
#ifndef NK_DOT_RVVBB_H
|
|
16
16
|
#define NK_DOT_RVVBB_H
|
|
17
17
|
|
|
18
|
-
#if
|
|
18
|
+
#if NK_TARGET_RISCV64_
|
|
19
19
|
#if NK_TARGET_RVVBB
|
|
20
20
|
|
|
21
21
|
#include "numkong/types.h"
|
|
@@ -68,5 +68,5 @@ NK_PUBLIC void nk_dot_u1_rvvbb(nk_u1x8_t const *a, nk_u1x8_t const *b, nk_size_t
|
|
|
68
68
|
#endif
|
|
69
69
|
|
|
70
70
|
#endif // NK_TARGET_RVVBB
|
|
71
|
-
#endif //
|
|
71
|
+
#endif // NK_TARGET_RISCV64_
|
|
72
72
|
#endif // NK_DOT_RVVBB_H
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
#ifndef NK_DOT_RVVBF16_H
|
|
19
19
|
#define NK_DOT_RVVBF16_H
|
|
20
20
|
|
|
21
|
-
#if
|
|
21
|
+
#if NK_TARGET_RISCV64_
|
|
22
22
|
#if NK_TARGET_RVVBF16
|
|
23
23
|
|
|
24
24
|
#include "numkong/types.h"
|
|
@@ -119,5 +119,5 @@ NK_PUBLIC void nk_dot_e5m2_rvvbf16(nk_e5m2_t const *a_scalars, nk_e5m2_t const *
|
|
|
119
119
|
#endif
|
|
120
120
|
|
|
121
121
|
#endif // NK_TARGET_RVVBF16
|
|
122
|
-
#endif //
|
|
122
|
+
#endif // NK_TARGET_RISCV64_
|
|
123
123
|
#endif // NK_DOT_RVVBF16_H
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
#ifndef NK_DOT_RVVHALF_H
|
|
20
20
|
#define NK_DOT_RVVHALF_H
|
|
21
21
|
|
|
22
|
-
#if
|
|
22
|
+
#if NK_TARGET_RISCV64_
|
|
23
23
|
#if NK_TARGET_RVVHALF
|
|
24
24
|
|
|
25
25
|
#include "numkong/types.h"
|
|
@@ -125,5 +125,5 @@ NK_PUBLIC void nk_dot_e5m2_rvvhalf(nk_e5m2_t const *a_scalars, nk_e5m2_t const *
|
|
|
125
125
|
#endif
|
|
126
126
|
|
|
127
127
|
#endif // NK_TARGET_RVVHALF
|
|
128
|
-
#endif //
|
|
128
|
+
#endif // NK_TARGET_RISCV64_
|
|
129
129
|
#endif // NK_DOT_RVVHALF_H
|
|
@@ -32,7 +32,7 @@
|
|
|
32
32
|
#ifndef NK_DOT_SAPPHIRE_H
|
|
33
33
|
#define NK_DOT_SAPPHIRE_H
|
|
34
34
|
|
|
35
|
-
#if
|
|
35
|
+
#if NK_TARGET_X8664_
|
|
36
36
|
#if NK_TARGET_SAPPHIRE
|
|
37
37
|
|
|
38
38
|
#include "numkong/types.h"
|
|
@@ -137,5 +137,5 @@ NK_INTERNAL __m512 nk_flush_f16_to_f32_sapphire_(__m512h acc_f16x32, __m512 sum_
|
|
|
137
137
|
#endif
|
|
138
138
|
|
|
139
139
|
#endif // NK_TARGET_SAPPHIRE
|
|
140
|
-
#endif //
|
|
140
|
+
#endif // NK_TARGET_X8664_
|
|
141
141
|
#endif // NK_DOT_SAPPHIRE_H
|
|
@@ -75,7 +75,7 @@
|
|
|
75
75
|
#ifndef NK_DOT_SIERRA_H
|
|
76
76
|
#define NK_DOT_SIERRA_H
|
|
77
77
|
|
|
78
|
-
#if
|
|
78
|
+
#if NK_TARGET_X8664_
|
|
79
79
|
#if NK_TARGET_SIERRA
|
|
80
80
|
|
|
81
81
|
#include "numkong/types.h"
|
|
@@ -401,5 +401,5 @@ NK_INTERNAL void nk_dot_e2m3x32_finalize_sierra(
|
|
|
401
401
|
#endif
|
|
402
402
|
|
|
403
403
|
#endif // NK_TARGET_SIERRA
|
|
404
|
-
#endif //
|
|
404
|
+
#endif // NK_TARGET_X8664_
|
|
405
405
|
#endif // NK_DOT_SIERRA_H
|
|
@@ -78,7 +78,7 @@
|
|
|
78
78
|
#ifndef NK_DOT_SKYLAKE_H
|
|
79
79
|
#define NK_DOT_SKYLAKE_H
|
|
80
80
|
|
|
81
|
-
#if
|
|
81
|
+
#if NK_TARGET_X8664_
|
|
82
82
|
#if NK_TARGET_SKYLAKE
|
|
83
83
|
|
|
84
84
|
#include "numkong/cast/skylake.h" // `nk_bf16x16_to_f32x16_skylake_`
|
|
@@ -1112,5 +1112,5 @@ NK_INTERNAL void nk_dot_e3m2x64_finalize_skylake(
|
|
|
1112
1112
|
#endif
|
|
1113
1113
|
|
|
1114
1114
|
#endif // NK_TARGET_SKYLAKE
|
|
1115
|
-
#endif //
|
|
1115
|
+
#endif // NK_TARGET_X8664_
|
|
1116
1116
|
#endif // NK_DOT_SKYLAKE_H
|
|
@@ -35,7 +35,7 @@
|
|
|
35
35
|
#ifndef NK_DOT_SVE_H
|
|
36
36
|
#define NK_DOT_SVE_H
|
|
37
37
|
|
|
38
|
-
#if
|
|
38
|
+
#if NK_TARGET_ARM64_
|
|
39
39
|
#if NK_TARGET_SVE
|
|
40
40
|
|
|
41
41
|
#include "numkong/types.h" // `nk_f32_t`
|
|
@@ -415,5 +415,5 @@ NK_PUBLIC void nk_vdot_f64c_sve(nk_f64c_t const *a_pairs, nk_f64c_t const *b_pai
|
|
|
415
415
|
#endif
|
|
416
416
|
|
|
417
417
|
#endif // NK_TARGET_SVE
|
|
418
|
-
#endif //
|
|
418
|
+
#endif // NK_TARGET_ARM64_
|
|
419
419
|
#endif // NK_DOT_SVE_H
|
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
#ifndef NK_DOT_SVEBFDOT_H
|
|
28
28
|
#define NK_DOT_SVEBFDOT_H
|
|
29
29
|
|
|
30
|
-
#if
|
|
30
|
+
#if NK_TARGET_ARM64_
|
|
31
31
|
#if NK_TARGET_SVEBFDOT
|
|
32
32
|
|
|
33
33
|
#include "numkong/types.h"
|
|
@@ -70,5 +70,5 @@ NK_PUBLIC void nk_dot_bf16_svebfdot(nk_bf16_t const *a_scalars, nk_bf16_t const
|
|
|
70
70
|
#endif
|
|
71
71
|
|
|
72
72
|
#endif // NK_TARGET_SVEBFDOT
|
|
73
|
-
#endif //
|
|
73
|
+
#endif // NK_TARGET_ARM64_
|
|
74
74
|
#endif // NK_DOT_SVEBFDOT_H
|
|
@@ -29,7 +29,7 @@
|
|
|
29
29
|
#ifndef NK_DOT_SVEHALF_H
|
|
30
30
|
#define NK_DOT_SVEHALF_H
|
|
31
31
|
|
|
32
|
-
#if
|
|
32
|
+
#if NK_TARGET_ARM64_
|
|
33
33
|
#if NK_TARGET_SVEHALF
|
|
34
34
|
|
|
35
35
|
#include "numkong/types.h" // `nk_f16_t`
|
|
@@ -163,5 +163,5 @@ NK_PUBLIC void nk_vdot_f16c_svehalf(nk_f16c_t const *a_pairs, nk_f16c_t const *b
|
|
|
163
163
|
#endif
|
|
164
164
|
|
|
165
165
|
#endif // NK_TARGET_SVEHALF
|
|
166
|
-
#endif //
|
|
166
|
+
#endif // NK_TARGET_ARM64_
|
|
167
167
|
#endif // NK_DOT_SVEHALF_H
|
|
@@ -30,7 +30,7 @@
|
|
|
30
30
|
#ifndef NK_DOT_SVESDOT_H
|
|
31
31
|
#define NK_DOT_SVESDOT_H
|
|
32
32
|
|
|
33
|
-
#if
|
|
33
|
+
#if NK_TARGET_ARM64_
|
|
34
34
|
#if NK_TARGET_SVESDOT
|
|
35
35
|
|
|
36
36
|
#include "numkong/types.h"
|
|
@@ -85,5 +85,5 @@ NK_PUBLIC void nk_dot_u8_svesdot(nk_u8_t const *a_scalars, nk_u8_t const *b_scal
|
|
|
85
85
|
#endif
|
|
86
86
|
|
|
87
87
|
#endif // NK_TARGET_SVESDOT
|
|
88
|
-
#endif //
|
|
88
|
+
#endif // NK_TARGET_ARM64_
|
|
89
89
|
#endif // NK_DOT_SVESDOT_H
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
#ifndef NK_DOTS_ALDER_H
|
|
13
13
|
#define NK_DOTS_ALDER_H
|
|
14
14
|
|
|
15
|
-
#if
|
|
15
|
+
#if NK_TARGET_X8664_
|
|
16
16
|
#if NK_TARGET_ALDER
|
|
17
17
|
|
|
18
18
|
#include "numkong/dot/alder.h" // Alder-specific dot product helpers
|
|
@@ -113,5 +113,5 @@ nk_define_cross_packed_(dots, e2m3, alder, e2m3, e2m3, f32, nk_b256_vec_t, nk_do
|
|
|
113
113
|
#endif
|
|
114
114
|
|
|
115
115
|
#endif // NK_TARGET_ALDER
|
|
116
|
-
#endif //
|
|
116
|
+
#endif // NK_TARGET_X8664_
|
|
117
117
|
#endif // NK_DOTS_ALDER_H
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
#ifndef NK_DOTS_DIAMOND_H
|
|
13
13
|
#define NK_DOTS_DIAMOND_H
|
|
14
14
|
|
|
15
|
-
#if
|
|
15
|
+
#if NK_TARGET_X8664_
|
|
16
16
|
#if NK_TARGET_DIAMOND
|
|
17
17
|
|
|
18
18
|
#include "numkong/dot/diamond.h"
|
|
@@ -82,5 +82,5 @@ nk_define_cross_packed_(dots, e5m2, diamond, e5m2, e5m2, f32, nk_b512_vec_t, nk_
|
|
|
82
82
|
#endif
|
|
83
83
|
|
|
84
84
|
#endif // NK_TARGET_DIAMOND
|
|
85
|
-
#endif //
|
|
85
|
+
#endif // NK_TARGET_X8664_
|
|
86
86
|
#endif // NK_DOTS_DIAMOND_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_DOTS_GENOA_H
|
|
10
10
|
#define NK_DOTS_GENOA_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_X8664_
|
|
13
13
|
#if NK_TARGET_GENOA
|
|
14
14
|
|
|
15
15
|
#include "numkong/dot/genoa.h"
|
|
@@ -96,5 +96,5 @@ nk_define_cross_packed_(dots, e5m2, genoa, e5m2, bf16, f32, nk_b512_vec_t, nk_do
|
|
|
96
96
|
#endif
|
|
97
97
|
|
|
98
98
|
#endif // NK_TARGET_GENOA
|
|
99
|
-
#endif //
|
|
99
|
+
#endif // NK_TARGET_X8664_
|
|
100
100
|
#endif // NK_DOTS_GENOA_H
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
#ifndef NK_DOTS_HASWELL_H
|
|
24
24
|
#define NK_DOTS_HASWELL_H
|
|
25
25
|
|
|
26
|
-
#if
|
|
26
|
+
#if NK_TARGET_X8664_
|
|
27
27
|
#if NK_TARGET_HASWELL
|
|
28
28
|
|
|
29
29
|
#include "numkong/dot/haswell.h"
|
|
@@ -306,5 +306,5 @@ nk_define_cross_packed_(dots, u1, haswell, u1x8, u1x8, u32, nk_b128_vec_t, nk_do
|
|
|
306
306
|
#endif
|
|
307
307
|
|
|
308
308
|
#endif // NK_TARGET_HASWELL
|
|
309
|
-
#endif //
|
|
309
|
+
#endif // NK_TARGET_X8664_
|
|
310
310
|
#endif // NK_DOTS_HASWELL_H
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
#ifndef NK_DOTS_ICELAKE_H
|
|
22
22
|
#define NK_DOTS_ICELAKE_H
|
|
23
23
|
|
|
24
|
-
#if
|
|
24
|
+
#if NK_TARGET_X8664_
|
|
25
25
|
#if NK_TARGET_ICELAKE
|
|
26
26
|
|
|
27
27
|
#include "numkong/dot/icelake.h"
|
|
@@ -176,5 +176,5 @@ nk_define_cross_packed_(dots, u1, icelake, u1x8, u1x8, u32, nk_b512_vec_t, nk_do
|
|
|
176
176
|
#endif
|
|
177
177
|
|
|
178
178
|
#endif // NK_TARGET_ICELAKE
|
|
179
|
-
#endif //
|
|
179
|
+
#endif // NK_TARGET_X8664_
|
|
180
180
|
#endif // NK_DOTS_ICELAKE_H
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
#ifndef NK_DOTS_LOONGSONASX_H
|
|
14
14
|
#define NK_DOTS_LOONGSONASX_H
|
|
15
15
|
|
|
16
|
-
#if
|
|
16
|
+
#if NK_TARGET_LOONGARCH64_
|
|
17
17
|
#if NK_TARGET_LOONGSONASX
|
|
18
18
|
|
|
19
19
|
#include "numkong/dot/loongsonasx.h"
|
|
@@ -172,5 +172,5 @@ nk_define_cross_packed_(dots, f16, loongsonasx, f16, f32, f32, nk_b256_vec_t, nk
|
|
|
172
172
|
#endif
|
|
173
173
|
|
|
174
174
|
#endif // NK_TARGET_LOONGSONASX
|
|
175
|
-
#endif //
|
|
175
|
+
#endif // NK_TARGET_LOONGARCH64_
|
|
176
176
|
#endif // NK_DOTS_LOONGSONASX_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_DOTS_NEON_H
|
|
10
10
|
#define NK_DOTS_NEON_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_ARM64_
|
|
13
13
|
#if NK_TARGET_NEON
|
|
14
14
|
|
|
15
15
|
#include "numkong/dot/neon.h"
|
|
@@ -119,5 +119,5 @@ nk_define_cross_packed_(dots, f64, neon, f64, f64, f64, nk_b128_vec_t, nk_dot_f6
|
|
|
119
119
|
#endif
|
|
120
120
|
|
|
121
121
|
#endif // NK_TARGET_NEON
|
|
122
|
-
#endif //
|
|
122
|
+
#endif // NK_TARGET_ARM64_
|
|
123
123
|
#endif // NK_DOTS_NEON_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_DOTS_NEONBFDOT_H
|
|
10
10
|
#define NK_DOTS_NEONBFDOT_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_ARM64_
|
|
13
13
|
#if NK_TARGET_NEONBFDOT
|
|
14
14
|
|
|
15
15
|
#include "numkong/dot/neonbfdot.h"
|
|
@@ -55,5 +55,5 @@ nk_define_cross_packed_(dots, bf16, neonbfdot, bf16, bf16, f32, nk_b128_vec_t, n
|
|
|
55
55
|
#endif
|
|
56
56
|
|
|
57
57
|
#endif // NK_TARGET_NEONBFDOT
|
|
58
|
-
#endif //
|
|
58
|
+
#endif // NK_TARGET_ARM64_
|
|
59
59
|
#endif // NK_DOTS_NEONBFDOT_H
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
#ifndef NK_DOTS_NEONFHM_H
|
|
13
13
|
#define NK_DOTS_NEONFHM_H
|
|
14
14
|
|
|
15
|
-
#if
|
|
15
|
+
#if NK_TARGET_ARM64_
|
|
16
16
|
#if NK_TARGET_NEONFHM
|
|
17
17
|
|
|
18
18
|
#include "numkong/dot/neonfhm.h"
|
|
@@ -92,5 +92,5 @@ nk_define_cross_packed_(dots, e5m2, neonfhm, e5m2, e5m2, f32, nk_b128_vec_t, nk_
|
|
|
92
92
|
#endif
|
|
93
93
|
|
|
94
94
|
#endif // NK_TARGET_NEONFHM
|
|
95
|
-
#endif //
|
|
95
|
+
#endif // NK_TARGET_ARM64_
|
|
96
96
|
#endif // NK_DOTS_NEONFHM_H
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
#ifndef NK_DOTS_NEONFP8_H
|
|
13
13
|
#define NK_DOTS_NEONFP8_H
|
|
14
14
|
|
|
15
|
-
#if
|
|
15
|
+
#if NK_TARGET_ARM64_
|
|
16
16
|
#if NK_TARGET_NEONFP8
|
|
17
17
|
|
|
18
18
|
#include "numkong/dot/neonfp8.h"
|
|
@@ -95,5 +95,5 @@ nk_define_cross_packed_(dots, e3m2, neonfp8, e3m2, e3m2, f32, nk_b128_vec_t, nk_
|
|
|
95
95
|
#endif
|
|
96
96
|
|
|
97
97
|
#endif // NK_TARGET_NEONFP8
|
|
98
|
-
#endif //
|
|
98
|
+
#endif // NK_TARGET_ARM64_
|
|
99
99
|
#endif // NK_DOTS_NEONFP8_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_DOTS_NEONSDOT_H
|
|
10
10
|
#define NK_DOTS_NEONSDOT_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_ARM64_
|
|
13
13
|
#if NK_TARGET_NEONSDOT
|
|
14
14
|
|
|
15
15
|
#include "numkong/dot/neonsdot.h"
|
|
@@ -140,5 +140,5 @@ nk_define_cross_packed_(dots, e3m2, neonsdot, e3m2, e3m2, f32, nk_b128_vec_t, nk
|
|
|
140
140
|
#endif
|
|
141
141
|
|
|
142
142
|
#endif // NK_TARGET_NEONSDOT
|
|
143
|
-
#endif //
|
|
143
|
+
#endif // NK_TARGET_ARM64_
|
|
144
144
|
#endif // NK_DOTS_NEONSDOT_H
|
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
#ifndef NK_DOTS_POWERVSX_H
|
|
25
25
|
#define NK_DOTS_POWERVSX_H
|
|
26
26
|
|
|
27
|
-
#if
|
|
27
|
+
#if NK_TARGET_POWER64_
|
|
28
28
|
#if NK_TARGET_POWERVSX
|
|
29
29
|
|
|
30
30
|
#include "numkong/dot/powervsx.h"
|
|
@@ -190,5 +190,5 @@ nk_define_cross_packed_(dots, f64, powervsx, f64, f64, f64, nk_b128_vec_t, nk_do
|
|
|
190
190
|
#endif
|
|
191
191
|
|
|
192
192
|
#endif // NK_TARGET_POWERVSX
|
|
193
|
-
#endif //
|
|
193
|
+
#endif // NK_TARGET_POWER64_
|
|
194
194
|
#endif // NK_DOTS_POWERVSX_H
|
|
@@ -43,7 +43,7 @@
|
|
|
43
43
|
#ifndef NK_DOTS_RVV_H
|
|
44
44
|
#define NK_DOTS_RVV_H
|
|
45
45
|
|
|
46
|
-
#if
|
|
46
|
+
#if NK_TARGET_RISCV64_
|
|
47
47
|
#if NK_TARGET_RVV
|
|
48
48
|
|
|
49
49
|
#include "numkong/types.h"
|
|
@@ -2589,5 +2589,5 @@ NK_PUBLIC void nk_dots_symmetric_e5m2_rvv(nk_e5m2_t const *vectors, nk_size_t ve
|
|
|
2589
2589
|
#endif
|
|
2590
2590
|
|
|
2591
2591
|
#endif // NK_TARGET_RVV
|
|
2592
|
-
#endif //
|
|
2592
|
+
#endif // NK_TARGET_RISCV64_
|
|
2593
2593
|
#endif // NK_DOTS_RVV_H
|
|
@@ -69,7 +69,7 @@
|
|
|
69
69
|
#ifndef NK_DOTS_SAPPHIREAMX_H
|
|
70
70
|
#define NK_DOTS_SAPPHIREAMX_H
|
|
71
71
|
|
|
72
|
-
#if
|
|
72
|
+
#if NK_TARGET_X8664_
|
|
73
73
|
#if NK_TARGET_SAPPHIREAMX
|
|
74
74
|
|
|
75
75
|
#include "numkong/cast/icelake.h" // For FP8 ↔ BF16 conversions
|
|
@@ -4013,5 +4013,5 @@ NK_PUBLIC void nk_dots_symmetric_e3m2_sapphireamx(
|
|
|
4013
4013
|
#endif
|
|
4014
4014
|
|
|
4015
4015
|
#endif // NK_TARGET_SAPPHIREAMX
|
|
4016
|
-
#endif //
|
|
4016
|
+
#endif // NK_TARGET_X8664_
|
|
4017
4017
|
#endif // NK_DOTS_SAPPHIREAMX_H
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
#ifndef NK_DOTS_SIERRA_H
|
|
14
14
|
#define NK_DOTS_SIERRA_H
|
|
15
15
|
|
|
16
|
-
#if
|
|
16
|
+
#if NK_TARGET_X8664_
|
|
17
17
|
#if NK_TARGET_SIERRA
|
|
18
18
|
|
|
19
19
|
#include "numkong/dot/sierra.h" // Sierra-specific dot product helpers
|
|
@@ -96,5 +96,5 @@ nk_define_cross_packed_(dots, e2m3, sierra, e2m3, e2m3, f32, nk_b256_vec_t, nk_d
|
|
|
96
96
|
#endif
|
|
97
97
|
|
|
98
98
|
#endif // NK_TARGET_SIERRA
|
|
99
|
-
#endif //
|
|
99
|
+
#endif // NK_TARGET_X8664_
|
|
100
100
|
#endif // NK_DOTS_SIERRA_H
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
#ifndef NK_DOTS_SKYLAKE_H
|
|
22
22
|
#define NK_DOTS_SKYLAKE_H
|
|
23
23
|
|
|
24
|
-
#if
|
|
24
|
+
#if NK_TARGET_X8664_
|
|
25
25
|
#if NK_TARGET_SKYLAKE
|
|
26
26
|
|
|
27
27
|
#include "numkong/dot/skylake.h"
|
|
@@ -203,5 +203,5 @@ nk_define_cross_packed_(dots, e3m2, skylake, e3m2, e3m2, f32, nk_b512_vec_t, nk_
|
|
|
203
203
|
#endif
|
|
204
204
|
|
|
205
205
|
#endif // NK_TARGET_SKYLAKE
|
|
206
|
-
#endif //
|
|
206
|
+
#endif // NK_TARGET_X8664_
|
|
207
207
|
#endif // NK_DOTS_SKYLAKE_H
|
|
@@ -58,7 +58,7 @@
|
|
|
58
58
|
#ifndef NK_DOTS_SME_H
|
|
59
59
|
#define NK_DOTS_SME_H
|
|
60
60
|
|
|
61
|
-
#if
|
|
61
|
+
#if NK_TARGET_ARM64_
|
|
62
62
|
#if NK_TARGET_SME
|
|
63
63
|
|
|
64
64
|
#include "numkong/types.h"
|
|
@@ -1520,8 +1520,8 @@ NK_PUBLIC svfloat16_t nk_e5m2x_to_f16x_ssve_(svbool_t predicate_b16x, svuint8_t
|
|
|
1520
1520
|
* Converts `e4m3` → `f16` on-the-fly for A, B is pre-converted during packing.
|
|
1521
1521
|
*/
|
|
1522
1522
|
__arm_locally_streaming __arm_new("za") static void nk_dots_packed_e4m3_sme_streaming_( //
|
|
1523
|
-
nk_e4m3_t const *a, void const *b_packed, nk_f32_t *c,
|
|
1524
|
-
nk_size_t rows, nk_size_t columns, nk_size_t depth,
|
|
1523
|
+
nk_e4m3_t const *a, void const *b_packed, nk_f32_t *c, //
|
|
1524
|
+
nk_size_t rows, nk_size_t columns, nk_size_t depth, //
|
|
1525
1525
|
nk_size_t a_stride_elements, nk_size_t c_stride_elements) {
|
|
1526
1526
|
|
|
1527
1527
|
nk_dots_sme_packed_header_t const *header = (nk_dots_sme_packed_header_t const *)b_packed;
|
|
@@ -2032,8 +2032,8 @@ NK_PUBLIC void nk_dots_symmetric_e4m3_sme(nk_e4m3_t const *vectors, nk_size_t ve
|
|
|
2032
2032
|
* Converts `e5m2` → `f16` on-the-fly for A, B is pre-converted during packing.
|
|
2033
2033
|
*/
|
|
2034
2034
|
__arm_locally_streaming __arm_new("za") static void nk_dots_packed_e5m2_sme_streaming_( //
|
|
2035
|
-
nk_e5m2_t const *a, void const *b_packed, nk_f32_t *c,
|
|
2036
|
-
nk_size_t rows, nk_size_t columns, nk_size_t depth,
|
|
2035
|
+
nk_e5m2_t const *a, void const *b_packed, nk_f32_t *c, //
|
|
2036
|
+
nk_size_t rows, nk_size_t columns, nk_size_t depth, //
|
|
2037
2037
|
nk_size_t a_stride_elements, nk_size_t c_stride_elements) {
|
|
2038
2038
|
|
|
2039
2039
|
nk_dots_sme_packed_header_t const *header = (nk_dots_sme_packed_header_t const *)b_packed;
|
|
@@ -2491,8 +2491,8 @@ NK_PUBLIC svint8_t nk_e2m3x_to_i8x_ssve_(svbool_t predicate_b8x, svuint8_t raw_b
|
|
|
2491
2491
|
* Accumulates in `i32` via `svmopa_za32_s8_m`, then converts to `f32` with 1/256 scaling.
|
|
2492
2492
|
*/
|
|
2493
2493
|
__arm_locally_streaming __arm_new("za") static void nk_dots_packed_e2m3_sme_streaming_( //
|
|
2494
|
-
nk_e2m3_t const *a, void const *b_packed, nk_f32_t *c,
|
|
2495
|
-
nk_size_t rows, nk_size_t columns, nk_size_t depth,
|
|
2494
|
+
nk_e2m3_t const *a, void const *b_packed, nk_f32_t *c, //
|
|
2495
|
+
nk_size_t rows, nk_size_t columns, nk_size_t depth, //
|
|
2496
2496
|
nk_size_t a_stride_elements, nk_size_t c_stride_elements) {
|
|
2497
2497
|
|
|
2498
2498
|
nk_dots_sme_packed_header_t const *header = (nk_dots_sme_packed_header_t const *)b_packed;
|
|
@@ -3013,8 +3013,8 @@ NK_PUBLIC svfloat16_t nk_e3m2x_to_f16x_ssve_(svbool_t predicate_b16x, svuint8_t
|
|
|
3013
3013
|
* Converts `e3m2` → `f16` on-the-fly for A, B is pre-converted during packing.
|
|
3014
3014
|
*/
|
|
3015
3015
|
__arm_locally_streaming __arm_new("za") static void nk_dots_packed_e3m2_sme_streaming_( //
|
|
3016
|
-
nk_e3m2_t const *a, void const *b_packed, nk_f32_t *c,
|
|
3017
|
-
nk_size_t rows, nk_size_t columns, nk_size_t depth,
|
|
3016
|
+
nk_e3m2_t const *a, void const *b_packed, nk_f32_t *c, //
|
|
3017
|
+
nk_size_t rows, nk_size_t columns, nk_size_t depth, //
|
|
3018
3018
|
nk_size_t a_stride_elements, nk_size_t c_stride_elements) {
|
|
3019
3019
|
|
|
3020
3020
|
nk_dots_sme_packed_header_t const *header = (nk_dots_sme_packed_header_t const *)b_packed;
|
|
@@ -5005,5 +5005,5 @@ NK_PUBLIC void nk_dots_symmetric_i4_sme(nk_i4x2_t const *vectors, nk_size_t vect
|
|
|
5005
5005
|
#endif
|
|
5006
5006
|
|
|
5007
5007
|
#endif // NK_TARGET_SME
|
|
5008
|
-
#endif //
|
|
5008
|
+
#endif // NK_TARGET_ARM64_
|
|
5009
5009
|
#endif // NK_DOTS_SME_H
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
#ifndef NK_DOTS_SMEBI32_H
|
|
14
14
|
#define NK_DOTS_SMEBI32_H
|
|
15
15
|
|
|
16
|
-
#if
|
|
16
|
+
#if NK_TARGET_ARM64_
|
|
17
17
|
#if NK_TARGET_SMEBI32
|
|
18
18
|
|
|
19
19
|
#include "numkong/types.h"
|
|
@@ -470,5 +470,5 @@ NK_PUBLIC void nk_dots_symmetric_u1_smebi32(nk_u1x8_t const *vectors, nk_size_t
|
|
|
470
470
|
#endif
|
|
471
471
|
|
|
472
472
|
#endif // NK_TARGET_SMEBI32
|
|
473
|
-
#endif //
|
|
473
|
+
#endif // NK_TARGET_ARM64_
|
|
474
474
|
#endif // NK_DOTS_SMEBI32_H
|
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
#ifndef NK_DOTS_SMEF64_H
|
|
35
35
|
#define NK_DOTS_SMEF64_H
|
|
36
36
|
|
|
37
|
-
#if
|
|
37
|
+
#if NK_TARGET_ARM64_
|
|
38
38
|
#if NK_TARGET_SME
|
|
39
39
|
|
|
40
40
|
#include "numkong/types.h"
|
|
@@ -1319,5 +1319,5 @@ NK_PUBLIC void nk_dots_packed_f64_smef64(nk_f64_t const *a, void const *b_packed
|
|
|
1319
1319
|
#endif
|
|
1320
1320
|
|
|
1321
1321
|
#endif // NK_TARGET_SME
|
|
1322
|
-
#endif //
|
|
1322
|
+
#endif // NK_TARGET_ARM64_
|
|
1323
1323
|
#endif // NK_DOTS_SMEF64_H
|
|
@@ -26,7 +26,7 @@
|
|
|
26
26
|
#ifndef NK_DOTS_SMEHALF_H
|
|
27
27
|
#define NK_DOTS_SMEHALF_H
|
|
28
28
|
|
|
29
|
-
#if
|
|
29
|
+
#if NK_TARGET_ARM64_
|
|
30
30
|
#if NK_TARGET_SMEHALF
|
|
31
31
|
|
|
32
32
|
#if defined(__cplusplus)
|
|
@@ -42,6 +42,6 @@ extern "C" {
|
|
|
42
42
|
#endif
|
|
43
43
|
|
|
44
44
|
#endif // NK_TARGET_SMEHALF
|
|
45
|
-
#endif //
|
|
45
|
+
#endif // NK_TARGET_ARM64_
|
|
46
46
|
|
|
47
47
|
#endif // NK_DOTS_SMEHALF_H
|
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
#ifndef NK_EACH_HASWELL_H
|
|
23
23
|
#define NK_EACH_HASWELL_H
|
|
24
24
|
|
|
25
|
-
#if
|
|
25
|
+
#if NK_TARGET_X8664_
|
|
26
26
|
#if NK_TARGET_HASWELL
|
|
27
27
|
|
|
28
28
|
#include "numkong/types.h"
|
|
@@ -1654,5 +1654,5 @@ NK_PUBLIC void nk_each_fma_f64c_haswell(nk_f64c_t const *a, nk_f64c_t const *b,
|
|
|
1654
1654
|
#endif
|
|
1655
1655
|
|
|
1656
1656
|
#endif // NK_TARGET_HASWELL
|
|
1657
|
-
#endif //
|
|
1657
|
+
#endif // NK_TARGET_X8664_
|
|
1658
1658
|
#endif // NK_EACH_HASWELL_H
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
#ifndef NK_EACH_ICELAKE_H
|
|
24
24
|
#define NK_EACH_ICELAKE_H
|
|
25
25
|
|
|
26
|
-
#if
|
|
26
|
+
#if NK_TARGET_X8664_
|
|
27
27
|
#if NK_TARGET_ICELAKE
|
|
28
28
|
|
|
29
29
|
#include "numkong/types.h"
|
|
@@ -268,5 +268,5 @@ nk_each_sum_u64_icelake_cycle:
|
|
|
268
268
|
#endif
|
|
269
269
|
|
|
270
270
|
#endif // NK_TARGET_ICELAKE
|
|
271
|
-
#endif //
|
|
271
|
+
#endif // NK_TARGET_X8664_
|
|
272
272
|
#endif // NK_EACH_ICELAKE_H
|
|
@@ -31,7 +31,7 @@
|
|
|
31
31
|
#ifndef NK_EACH_NEON_H
|
|
32
32
|
#define NK_EACH_NEON_H
|
|
33
33
|
|
|
34
|
-
#if
|
|
34
|
+
#if NK_TARGET_ARM64_
|
|
35
35
|
#if NK_TARGET_NEON
|
|
36
36
|
|
|
37
37
|
#include "numkong/types.h"
|
|
@@ -1134,5 +1134,5 @@ NK_PUBLIC void nk_each_sum_i8_neon(nk_i8_t const *a, nk_i8_t const *b, nk_size_t
|
|
|
1134
1134
|
#endif
|
|
1135
1135
|
|
|
1136
1136
|
#endif // NK_TARGET_NEON
|
|
1137
|
-
#endif //
|
|
1137
|
+
#endif // NK_TARGET_ARM64_
|
|
1138
1138
|
#endif // NK_EACH_NEON_H
|
|
@@ -31,7 +31,7 @@
|
|
|
31
31
|
#ifndef NK_EACH_NEONBFDOT_H
|
|
32
32
|
#define NK_EACH_NEONBFDOT_H
|
|
33
33
|
|
|
34
|
-
#if
|
|
34
|
+
#if NK_TARGET_ARM64_
|
|
35
35
|
#if NK_TARGET_NEONBFDOT
|
|
36
36
|
|
|
37
37
|
#include "numkong/types.h"
|
|
@@ -207,5 +207,5 @@ NK_PUBLIC void nk_each_fma_bf16_neonbfdot( //
|
|
|
207
207
|
#endif
|
|
208
208
|
|
|
209
209
|
#endif // NK_TARGET_NEONBFDOT
|
|
210
|
-
#endif //
|
|
210
|
+
#endif // NK_TARGET_ARM64_
|
|
211
211
|
#endif // NK_EACH_NEONBFDOT_H
|