numkong 7.4.2 → 7.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +84 -84
- package/c/numkong.c +1 -1
- package/include/numkong/attention/sapphireamx.h +2 -2
- package/include/numkong/attention/sme.h +2 -2
- package/include/numkong/capabilities.h +47 -47
- package/include/numkong/cast/diamond.h +2 -2
- package/include/numkong/cast/haswell.h +2 -2
- package/include/numkong/cast/icelake.h +2 -2
- package/include/numkong/cast/loongsonasx.h +2 -2
- package/include/numkong/cast/neon.h +2 -2
- package/include/numkong/cast/powervsx.h +2 -2
- package/include/numkong/cast/rvv.h +2 -2
- package/include/numkong/cast/sapphire.h +2 -2
- package/include/numkong/cast/skylake.h +2 -2
- package/include/numkong/curved/genoa.h +2 -2
- package/include/numkong/curved/haswell.h +2 -2
- package/include/numkong/curved/neon.h +2 -2
- package/include/numkong/curved/neonbfdot.h +2 -2
- package/include/numkong/curved/rvv.h +2 -2
- package/include/numkong/curved/skylake.h +2 -2
- package/include/numkong/curved/smef64.h +2 -2
- package/include/numkong/dot/alder.h +2 -2
- package/include/numkong/dot/diamond.h +2 -2
- package/include/numkong/dot/genoa.h +2 -2
- package/include/numkong/dot/haswell.h +2 -2
- package/include/numkong/dot/icelake.h +2 -2
- package/include/numkong/dot/loongsonasx.h +2 -2
- package/include/numkong/dot/neon.h +2 -2
- package/include/numkong/dot/neonbfdot.h +2 -2
- package/include/numkong/dot/neonfhm.h +2 -2
- package/include/numkong/dot/neonfp8.h +2 -2
- package/include/numkong/dot/neonsdot.h +2 -2
- package/include/numkong/dot/rvv.h +2 -2
- package/include/numkong/dot/rvvbb.h +2 -2
- package/include/numkong/dot/rvvbf16.h +2 -2
- package/include/numkong/dot/rvvhalf.h +2 -2
- package/include/numkong/dot/sapphire.h +2 -2
- package/include/numkong/dot/sierra.h +2 -2
- package/include/numkong/dot/skylake.h +2 -2
- package/include/numkong/dot/sve.h +2 -2
- package/include/numkong/dot/svebfdot.h +2 -2
- package/include/numkong/dot/svehalf.h +2 -2
- package/include/numkong/dot/svesdot.h +2 -2
- package/include/numkong/dots/alder.h +2 -2
- package/include/numkong/dots/diamond.h +2 -2
- package/include/numkong/dots/genoa.h +2 -2
- package/include/numkong/dots/haswell.h +2 -2
- package/include/numkong/dots/icelake.h +2 -2
- package/include/numkong/dots/loongsonasx.h +2 -2
- package/include/numkong/dots/neon.h +2 -2
- package/include/numkong/dots/neonbfdot.h +2 -2
- package/include/numkong/dots/neonfhm.h +2 -2
- package/include/numkong/dots/neonfp8.h +2 -2
- package/include/numkong/dots/neonsdot.h +2 -2
- package/include/numkong/dots/powervsx.h +2 -2
- package/include/numkong/dots/rvv.h +2 -2
- package/include/numkong/dots/sapphireamx.h +2 -2
- package/include/numkong/dots/sierra.h +2 -2
- package/include/numkong/dots/skylake.h +2 -2
- package/include/numkong/dots/sme.h +10 -10
- package/include/numkong/dots/smebi32.h +2 -2
- package/include/numkong/dots/smef64.h +2 -2
- package/include/numkong/dots/smehalf.h +2 -2
- package/include/numkong/each/haswell.h +2 -2
- package/include/numkong/each/icelake.h +2 -2
- package/include/numkong/each/neon.h +2 -2
- package/include/numkong/each/neonbfdot.h +2 -2
- package/include/numkong/each/neonhalf.h +2 -2
- package/include/numkong/each/rvv.h +2 -2
- package/include/numkong/each/sapphire.h +2 -2
- package/include/numkong/each/skylake.h +2 -2
- package/include/numkong/geospatial/haswell.h +2 -2
- package/include/numkong/geospatial/neon.h +2 -2
- package/include/numkong/geospatial/rvv.h +2 -2
- package/include/numkong/geospatial/skylake.h +2 -2
- package/include/numkong/maxsim/alder.h +2 -2
- package/include/numkong/maxsim/genoa.h +2 -2
- package/include/numkong/maxsim/haswell.h +2 -2
- package/include/numkong/maxsim/icelake.h +2 -2
- package/include/numkong/maxsim/neonsdot.h +2 -2
- package/include/numkong/maxsim/sapphireamx.h +2 -2
- package/include/numkong/maxsim/sme.h +2 -2
- package/include/numkong/mesh/haswell.h +2 -2
- package/include/numkong/mesh/neon.h +2 -2
- package/include/numkong/mesh/neonbfdot.h +2 -2
- package/include/numkong/mesh/rvv.h +2 -2
- package/include/numkong/mesh/skylake.h +2 -2
- package/include/numkong/numkong.h +1 -1
- package/include/numkong/probability/haswell.h +2 -2
- package/include/numkong/probability/neon.h +2 -2
- package/include/numkong/probability/rvv.h +2 -2
- package/include/numkong/probability/skylake.h +2 -2
- package/include/numkong/reduce/alder.h +2 -2
- package/include/numkong/reduce/genoa.h +2 -2
- package/include/numkong/reduce/haswell.h +2 -2
- package/include/numkong/reduce/icelake.h +2 -2
- package/include/numkong/reduce/neon.h +2 -2
- package/include/numkong/reduce/neonbfdot.h +2 -2
- package/include/numkong/reduce/neonfhm.h +2 -2
- package/include/numkong/reduce/neonsdot.h +2 -2
- package/include/numkong/reduce/rvv.h +2 -2
- package/include/numkong/reduce/sierra.h +2 -2
- package/include/numkong/reduce/skylake.h +2 -2
- package/include/numkong/scalar/haswell.h +2 -2
- package/include/numkong/scalar/loongsonasx.h +2 -2
- package/include/numkong/scalar/neon.h +2 -2
- package/include/numkong/scalar/neonhalf.h +2 -2
- package/include/numkong/scalar/powervsx.h +2 -2
- package/include/numkong/scalar/rvv.h +2 -2
- package/include/numkong/scalar/sapphire.h +2 -2
- package/include/numkong/set/haswell.h +2 -2
- package/include/numkong/set/icelake.h +2 -2
- package/include/numkong/set/loongsonasx.h +2 -2
- package/include/numkong/set/neon.h +2 -2
- package/include/numkong/set/powervsx.h +2 -2
- package/include/numkong/set/rvv.h +2 -2
- package/include/numkong/set/rvvbb.h +2 -2
- package/include/numkong/set/sve.h +2 -2
- package/include/numkong/sets/haswell.h +2 -2
- package/include/numkong/sets/icelake.h +2 -2
- package/include/numkong/sets/loongsonasx.h +2 -2
- package/include/numkong/sets/neon.h +2 -2
- package/include/numkong/sets/powervsx.h +2 -2
- package/include/numkong/sets/smebi32.h +2 -2
- package/include/numkong/sparse/icelake.h +2 -2
- package/include/numkong/sparse/neon.h +2 -2
- package/include/numkong/sparse/sve2.h +2 -2
- package/include/numkong/sparse/turin.h +2 -2
- package/include/numkong/spatial/alder.h +2 -2
- package/include/numkong/spatial/diamond.h +2 -2
- package/include/numkong/spatial/genoa.h +2 -2
- package/include/numkong/spatial/haswell.h +2 -2
- package/include/numkong/spatial/icelake.h +2 -2
- package/include/numkong/spatial/loongsonasx.h +2 -2
- package/include/numkong/spatial/neon.h +2 -2
- package/include/numkong/spatial/neonbfdot.h +2 -2
- package/include/numkong/spatial/neonfp8.h +2 -2
- package/include/numkong/spatial/neonsdot.h +2 -2
- package/include/numkong/spatial/powervsx.h +2 -2
- package/include/numkong/spatial/rvv.h +2 -2
- package/include/numkong/spatial/rvvbf16.h +2 -2
- package/include/numkong/spatial/rvvhalf.h +2 -2
- package/include/numkong/spatial/sierra.h +2 -2
- package/include/numkong/spatial/skylake.h +2 -2
- package/include/numkong/spatial/sve.h +2 -2
- package/include/numkong/spatial/svebfdot.h +2 -2
- package/include/numkong/spatial/svehalf.h +2 -2
- package/include/numkong/spatial/svesdot.h +2 -2
- package/include/numkong/spatials/alder.h +2 -2
- package/include/numkong/spatials/diamond.h +2 -2
- package/include/numkong/spatials/genoa.h +2 -2
- package/include/numkong/spatials/haswell.h +2 -2
- package/include/numkong/spatials/icelake.h +2 -2
- package/include/numkong/spatials/loongsonasx.h +2 -2
- package/include/numkong/spatials/neon.h +2 -2
- package/include/numkong/spatials/neonbfdot.h +2 -2
- package/include/numkong/spatials/neonfhm.h +2 -2
- package/include/numkong/spatials/neonfp8.h +2 -2
- package/include/numkong/spatials/neonsdot.h +2 -2
- package/include/numkong/spatials/powervsx.h +2 -2
- package/include/numkong/spatials/rvv.h +2 -2
- package/include/numkong/spatials/sapphireamx.h +2 -2
- package/include/numkong/spatials/sierra.h +2 -2
- package/include/numkong/spatials/skylake.h +2 -2
- package/include/numkong/spatials/sme.h +2 -2
- package/include/numkong/spatials/smef64.h +2 -2
- package/include/numkong/trigonometry/haswell.h +2 -2
- package/include/numkong/trigonometry/neon.h +2 -2
- package/include/numkong/trigonometry/rvv.h +2 -2
- package/include/numkong/trigonometry/skylake.h +2 -2
- package/include/numkong/types.h +88 -80
- package/package.json +7 -7
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_DOTS_NEONBFDOT_H
|
|
10
10
|
#define NK_DOTS_NEONBFDOT_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_ARM64_
|
|
13
13
|
#if NK_TARGET_NEONBFDOT
|
|
14
14
|
|
|
15
15
|
#include "numkong/dot/neonbfdot.h"
|
|
@@ -55,5 +55,5 @@ nk_define_cross_packed_(dots, bf16, neonbfdot, bf16, bf16, f32, nk_b128_vec_t, n
|
|
|
55
55
|
#endif
|
|
56
56
|
|
|
57
57
|
#endif // NK_TARGET_NEONBFDOT
|
|
58
|
-
#endif //
|
|
58
|
+
#endif // NK_TARGET_ARM64_
|
|
59
59
|
#endif // NK_DOTS_NEONBFDOT_H
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
#ifndef NK_DOTS_NEONFHM_H
|
|
13
13
|
#define NK_DOTS_NEONFHM_H
|
|
14
14
|
|
|
15
|
-
#if
|
|
15
|
+
#if NK_TARGET_ARM64_
|
|
16
16
|
#if NK_TARGET_NEONFHM
|
|
17
17
|
|
|
18
18
|
#include "numkong/dot/neonfhm.h"
|
|
@@ -92,5 +92,5 @@ nk_define_cross_packed_(dots, e5m2, neonfhm, e5m2, e5m2, f32, nk_b128_vec_t, nk_
|
|
|
92
92
|
#endif
|
|
93
93
|
|
|
94
94
|
#endif // NK_TARGET_NEONFHM
|
|
95
|
-
#endif //
|
|
95
|
+
#endif // NK_TARGET_ARM64_
|
|
96
96
|
#endif // NK_DOTS_NEONFHM_H
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
#ifndef NK_DOTS_NEONFP8_H
|
|
13
13
|
#define NK_DOTS_NEONFP8_H
|
|
14
14
|
|
|
15
|
-
#if
|
|
15
|
+
#if NK_TARGET_ARM64_
|
|
16
16
|
#if NK_TARGET_NEONFP8
|
|
17
17
|
|
|
18
18
|
#include "numkong/dot/neonfp8.h"
|
|
@@ -95,5 +95,5 @@ nk_define_cross_packed_(dots, e3m2, neonfp8, e3m2, e3m2, f32, nk_b128_vec_t, nk_
|
|
|
95
95
|
#endif
|
|
96
96
|
|
|
97
97
|
#endif // NK_TARGET_NEONFP8
|
|
98
|
-
#endif //
|
|
98
|
+
#endif // NK_TARGET_ARM64_
|
|
99
99
|
#endif // NK_DOTS_NEONFP8_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_DOTS_NEONSDOT_H
|
|
10
10
|
#define NK_DOTS_NEONSDOT_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_ARM64_
|
|
13
13
|
#if NK_TARGET_NEONSDOT
|
|
14
14
|
|
|
15
15
|
#include "numkong/dot/neonsdot.h"
|
|
@@ -140,5 +140,5 @@ nk_define_cross_packed_(dots, e3m2, neonsdot, e3m2, e3m2, f32, nk_b128_vec_t, nk
|
|
|
140
140
|
#endif
|
|
141
141
|
|
|
142
142
|
#endif // NK_TARGET_NEONSDOT
|
|
143
|
-
#endif //
|
|
143
|
+
#endif // NK_TARGET_ARM64_
|
|
144
144
|
#endif // NK_DOTS_NEONSDOT_H
|
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
#ifndef NK_DOTS_POWERVSX_H
|
|
25
25
|
#define NK_DOTS_POWERVSX_H
|
|
26
26
|
|
|
27
|
-
#if
|
|
27
|
+
#if NK_TARGET_POWER64_
|
|
28
28
|
#if NK_TARGET_POWERVSX
|
|
29
29
|
|
|
30
30
|
#include "numkong/dot/powervsx.h"
|
|
@@ -190,5 +190,5 @@ nk_define_cross_packed_(dots, f64, powervsx, f64, f64, f64, nk_b128_vec_t, nk_do
|
|
|
190
190
|
#endif
|
|
191
191
|
|
|
192
192
|
#endif // NK_TARGET_POWERVSX
|
|
193
|
-
#endif //
|
|
193
|
+
#endif // NK_TARGET_POWER64_
|
|
194
194
|
#endif // NK_DOTS_POWERVSX_H
|
|
@@ -43,7 +43,7 @@
|
|
|
43
43
|
#ifndef NK_DOTS_RVV_H
|
|
44
44
|
#define NK_DOTS_RVV_H
|
|
45
45
|
|
|
46
|
-
#if
|
|
46
|
+
#if NK_TARGET_RISCV64_
|
|
47
47
|
#if NK_TARGET_RVV
|
|
48
48
|
|
|
49
49
|
#include "numkong/types.h"
|
|
@@ -2589,5 +2589,5 @@ NK_PUBLIC void nk_dots_symmetric_e5m2_rvv(nk_e5m2_t const *vectors, nk_size_t ve
|
|
|
2589
2589
|
#endif
|
|
2590
2590
|
|
|
2591
2591
|
#endif // NK_TARGET_RVV
|
|
2592
|
-
#endif //
|
|
2592
|
+
#endif // NK_TARGET_RISCV64_
|
|
2593
2593
|
#endif // NK_DOTS_RVV_H
|
|
@@ -69,7 +69,7 @@
|
|
|
69
69
|
#ifndef NK_DOTS_SAPPHIREAMX_H
|
|
70
70
|
#define NK_DOTS_SAPPHIREAMX_H
|
|
71
71
|
|
|
72
|
-
#if
|
|
72
|
+
#if NK_TARGET_X8664_
|
|
73
73
|
#if NK_TARGET_SAPPHIREAMX
|
|
74
74
|
|
|
75
75
|
#include "numkong/cast/icelake.h" // For FP8 ↔ BF16 conversions
|
|
@@ -4013,5 +4013,5 @@ NK_PUBLIC void nk_dots_symmetric_e3m2_sapphireamx(
|
|
|
4013
4013
|
#endif
|
|
4014
4014
|
|
|
4015
4015
|
#endif // NK_TARGET_SAPPHIREAMX
|
|
4016
|
-
#endif //
|
|
4016
|
+
#endif // NK_TARGET_X8664_
|
|
4017
4017
|
#endif // NK_DOTS_SAPPHIREAMX_H
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
#ifndef NK_DOTS_SIERRA_H
|
|
14
14
|
#define NK_DOTS_SIERRA_H
|
|
15
15
|
|
|
16
|
-
#if
|
|
16
|
+
#if NK_TARGET_X8664_
|
|
17
17
|
#if NK_TARGET_SIERRA
|
|
18
18
|
|
|
19
19
|
#include "numkong/dot/sierra.h" // Sierra-specific dot product helpers
|
|
@@ -96,5 +96,5 @@ nk_define_cross_packed_(dots, e2m3, sierra, e2m3, e2m3, f32, nk_b256_vec_t, nk_d
|
|
|
96
96
|
#endif
|
|
97
97
|
|
|
98
98
|
#endif // NK_TARGET_SIERRA
|
|
99
|
-
#endif //
|
|
99
|
+
#endif // NK_TARGET_X8664_
|
|
100
100
|
#endif // NK_DOTS_SIERRA_H
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
#ifndef NK_DOTS_SKYLAKE_H
|
|
22
22
|
#define NK_DOTS_SKYLAKE_H
|
|
23
23
|
|
|
24
|
-
#if
|
|
24
|
+
#if NK_TARGET_X8664_
|
|
25
25
|
#if NK_TARGET_SKYLAKE
|
|
26
26
|
|
|
27
27
|
#include "numkong/dot/skylake.h"
|
|
@@ -203,5 +203,5 @@ nk_define_cross_packed_(dots, e3m2, skylake, e3m2, e3m2, f32, nk_b512_vec_t, nk_
|
|
|
203
203
|
#endif
|
|
204
204
|
|
|
205
205
|
#endif // NK_TARGET_SKYLAKE
|
|
206
|
-
#endif //
|
|
206
|
+
#endif // NK_TARGET_X8664_
|
|
207
207
|
#endif // NK_DOTS_SKYLAKE_H
|
|
@@ -58,7 +58,7 @@
|
|
|
58
58
|
#ifndef NK_DOTS_SME_H
|
|
59
59
|
#define NK_DOTS_SME_H
|
|
60
60
|
|
|
61
|
-
#if
|
|
61
|
+
#if NK_TARGET_ARM64_
|
|
62
62
|
#if NK_TARGET_SME
|
|
63
63
|
|
|
64
64
|
#include "numkong/types.h"
|
|
@@ -1520,8 +1520,8 @@ NK_PUBLIC svfloat16_t nk_e5m2x_to_f16x_ssve_(svbool_t predicate_b16x, svuint8_t
|
|
|
1520
1520
|
* Converts `e4m3` → `f16` on-the-fly for A, B is pre-converted during packing.
|
|
1521
1521
|
*/
|
|
1522
1522
|
__arm_locally_streaming __arm_new("za") static void nk_dots_packed_e4m3_sme_streaming_( //
|
|
1523
|
-
nk_e4m3_t const *a, void const *b_packed, nk_f32_t *c,
|
|
1524
|
-
nk_size_t rows, nk_size_t columns, nk_size_t depth,
|
|
1523
|
+
nk_e4m3_t const *a, void const *b_packed, nk_f32_t *c, //
|
|
1524
|
+
nk_size_t rows, nk_size_t columns, nk_size_t depth, //
|
|
1525
1525
|
nk_size_t a_stride_elements, nk_size_t c_stride_elements) {
|
|
1526
1526
|
|
|
1527
1527
|
nk_dots_sme_packed_header_t const *header = (nk_dots_sme_packed_header_t const *)b_packed;
|
|
@@ -2032,8 +2032,8 @@ NK_PUBLIC void nk_dots_symmetric_e4m3_sme(nk_e4m3_t const *vectors, nk_size_t ve
|
|
|
2032
2032
|
* Converts `e5m2` → `f16` on-the-fly for A, B is pre-converted during packing.
|
|
2033
2033
|
*/
|
|
2034
2034
|
__arm_locally_streaming __arm_new("za") static void nk_dots_packed_e5m2_sme_streaming_( //
|
|
2035
|
-
nk_e5m2_t const *a, void const *b_packed, nk_f32_t *c,
|
|
2036
|
-
nk_size_t rows, nk_size_t columns, nk_size_t depth,
|
|
2035
|
+
nk_e5m2_t const *a, void const *b_packed, nk_f32_t *c, //
|
|
2036
|
+
nk_size_t rows, nk_size_t columns, nk_size_t depth, //
|
|
2037
2037
|
nk_size_t a_stride_elements, nk_size_t c_stride_elements) {
|
|
2038
2038
|
|
|
2039
2039
|
nk_dots_sme_packed_header_t const *header = (nk_dots_sme_packed_header_t const *)b_packed;
|
|
@@ -2491,8 +2491,8 @@ NK_PUBLIC svint8_t nk_e2m3x_to_i8x_ssve_(svbool_t predicate_b8x, svuint8_t raw_b
|
|
|
2491
2491
|
* Accumulates in `i32` via `svmopa_za32_s8_m`, then converts to `f32` with 1/256 scaling.
|
|
2492
2492
|
*/
|
|
2493
2493
|
__arm_locally_streaming __arm_new("za") static void nk_dots_packed_e2m3_sme_streaming_( //
|
|
2494
|
-
nk_e2m3_t const *a, void const *b_packed, nk_f32_t *c,
|
|
2495
|
-
nk_size_t rows, nk_size_t columns, nk_size_t depth,
|
|
2494
|
+
nk_e2m3_t const *a, void const *b_packed, nk_f32_t *c, //
|
|
2495
|
+
nk_size_t rows, nk_size_t columns, nk_size_t depth, //
|
|
2496
2496
|
nk_size_t a_stride_elements, nk_size_t c_stride_elements) {
|
|
2497
2497
|
|
|
2498
2498
|
nk_dots_sme_packed_header_t const *header = (nk_dots_sme_packed_header_t const *)b_packed;
|
|
@@ -3013,8 +3013,8 @@ NK_PUBLIC svfloat16_t nk_e3m2x_to_f16x_ssve_(svbool_t predicate_b16x, svuint8_t
|
|
|
3013
3013
|
* Converts `e3m2` → `f16` on-the-fly for A, B is pre-converted during packing.
|
|
3014
3014
|
*/
|
|
3015
3015
|
__arm_locally_streaming __arm_new("za") static void nk_dots_packed_e3m2_sme_streaming_( //
|
|
3016
|
-
nk_e3m2_t const *a, void const *b_packed, nk_f32_t *c,
|
|
3017
|
-
nk_size_t rows, nk_size_t columns, nk_size_t depth,
|
|
3016
|
+
nk_e3m2_t const *a, void const *b_packed, nk_f32_t *c, //
|
|
3017
|
+
nk_size_t rows, nk_size_t columns, nk_size_t depth, //
|
|
3018
3018
|
nk_size_t a_stride_elements, nk_size_t c_stride_elements) {
|
|
3019
3019
|
|
|
3020
3020
|
nk_dots_sme_packed_header_t const *header = (nk_dots_sme_packed_header_t const *)b_packed;
|
|
@@ -5005,5 +5005,5 @@ NK_PUBLIC void nk_dots_symmetric_i4_sme(nk_i4x2_t const *vectors, nk_size_t vect
|
|
|
5005
5005
|
#endif
|
|
5006
5006
|
|
|
5007
5007
|
#endif // NK_TARGET_SME
|
|
5008
|
-
#endif //
|
|
5008
|
+
#endif // NK_TARGET_ARM64_
|
|
5009
5009
|
#endif // NK_DOTS_SME_H
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
#ifndef NK_DOTS_SMEBI32_H
|
|
14
14
|
#define NK_DOTS_SMEBI32_H
|
|
15
15
|
|
|
16
|
-
#if
|
|
16
|
+
#if NK_TARGET_ARM64_
|
|
17
17
|
#if NK_TARGET_SMEBI32
|
|
18
18
|
|
|
19
19
|
#include "numkong/types.h"
|
|
@@ -470,5 +470,5 @@ NK_PUBLIC void nk_dots_symmetric_u1_smebi32(nk_u1x8_t const *vectors, nk_size_t
|
|
|
470
470
|
#endif
|
|
471
471
|
|
|
472
472
|
#endif // NK_TARGET_SMEBI32
|
|
473
|
-
#endif //
|
|
473
|
+
#endif // NK_TARGET_ARM64_
|
|
474
474
|
#endif // NK_DOTS_SMEBI32_H
|
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
#ifndef NK_DOTS_SMEF64_H
|
|
35
35
|
#define NK_DOTS_SMEF64_H
|
|
36
36
|
|
|
37
|
-
#if
|
|
37
|
+
#if NK_TARGET_ARM64_
|
|
38
38
|
#if NK_TARGET_SME
|
|
39
39
|
|
|
40
40
|
#include "numkong/types.h"
|
|
@@ -1319,5 +1319,5 @@ NK_PUBLIC void nk_dots_packed_f64_smef64(nk_f64_t const *a, void const *b_packed
|
|
|
1319
1319
|
#endif
|
|
1320
1320
|
|
|
1321
1321
|
#endif // NK_TARGET_SME
|
|
1322
|
-
#endif //
|
|
1322
|
+
#endif // NK_TARGET_ARM64_
|
|
1323
1323
|
#endif // NK_DOTS_SMEF64_H
|
|
@@ -26,7 +26,7 @@
|
|
|
26
26
|
#ifndef NK_DOTS_SMEHALF_H
|
|
27
27
|
#define NK_DOTS_SMEHALF_H
|
|
28
28
|
|
|
29
|
-
#if
|
|
29
|
+
#if NK_TARGET_ARM64_
|
|
30
30
|
#if NK_TARGET_SMEHALF
|
|
31
31
|
|
|
32
32
|
#if defined(__cplusplus)
|
|
@@ -42,6 +42,6 @@ extern "C" {
|
|
|
42
42
|
#endif
|
|
43
43
|
|
|
44
44
|
#endif // NK_TARGET_SMEHALF
|
|
45
|
-
#endif //
|
|
45
|
+
#endif // NK_TARGET_ARM64_
|
|
46
46
|
|
|
47
47
|
#endif // NK_DOTS_SMEHALF_H
|
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
#ifndef NK_EACH_HASWELL_H
|
|
23
23
|
#define NK_EACH_HASWELL_H
|
|
24
24
|
|
|
25
|
-
#if
|
|
25
|
+
#if NK_TARGET_X8664_
|
|
26
26
|
#if NK_TARGET_HASWELL
|
|
27
27
|
|
|
28
28
|
#include "numkong/types.h"
|
|
@@ -1654,5 +1654,5 @@ NK_PUBLIC void nk_each_fma_f64c_haswell(nk_f64c_t const *a, nk_f64c_t const *b,
|
|
|
1654
1654
|
#endif
|
|
1655
1655
|
|
|
1656
1656
|
#endif // NK_TARGET_HASWELL
|
|
1657
|
-
#endif //
|
|
1657
|
+
#endif // NK_TARGET_X8664_
|
|
1658
1658
|
#endif // NK_EACH_HASWELL_H
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
#ifndef NK_EACH_ICELAKE_H
|
|
24
24
|
#define NK_EACH_ICELAKE_H
|
|
25
25
|
|
|
26
|
-
#if
|
|
26
|
+
#if NK_TARGET_X8664_
|
|
27
27
|
#if NK_TARGET_ICELAKE
|
|
28
28
|
|
|
29
29
|
#include "numkong/types.h"
|
|
@@ -268,5 +268,5 @@ nk_each_sum_u64_icelake_cycle:
|
|
|
268
268
|
#endif
|
|
269
269
|
|
|
270
270
|
#endif // NK_TARGET_ICELAKE
|
|
271
|
-
#endif //
|
|
271
|
+
#endif // NK_TARGET_X8664_
|
|
272
272
|
#endif // NK_EACH_ICELAKE_H
|
|
@@ -31,7 +31,7 @@
|
|
|
31
31
|
#ifndef NK_EACH_NEON_H
|
|
32
32
|
#define NK_EACH_NEON_H
|
|
33
33
|
|
|
34
|
-
#if
|
|
34
|
+
#if NK_TARGET_ARM64_
|
|
35
35
|
#if NK_TARGET_NEON
|
|
36
36
|
|
|
37
37
|
#include "numkong/types.h"
|
|
@@ -1134,5 +1134,5 @@ NK_PUBLIC void nk_each_sum_i8_neon(nk_i8_t const *a, nk_i8_t const *b, nk_size_t
|
|
|
1134
1134
|
#endif
|
|
1135
1135
|
|
|
1136
1136
|
#endif // NK_TARGET_NEON
|
|
1137
|
-
#endif //
|
|
1137
|
+
#endif // NK_TARGET_ARM64_
|
|
1138
1138
|
#endif // NK_EACH_NEON_H
|
|
@@ -31,7 +31,7 @@
|
|
|
31
31
|
#ifndef NK_EACH_NEONBFDOT_H
|
|
32
32
|
#define NK_EACH_NEONBFDOT_H
|
|
33
33
|
|
|
34
|
-
#if
|
|
34
|
+
#if NK_TARGET_ARM64_
|
|
35
35
|
#if NK_TARGET_NEONBFDOT
|
|
36
36
|
|
|
37
37
|
#include "numkong/types.h"
|
|
@@ -207,5 +207,5 @@ NK_PUBLIC void nk_each_fma_bf16_neonbfdot( //
|
|
|
207
207
|
#endif
|
|
208
208
|
|
|
209
209
|
#endif // NK_TARGET_NEONBFDOT
|
|
210
|
-
#endif //
|
|
210
|
+
#endif // NK_TARGET_ARM64_
|
|
211
211
|
#endif // NK_EACH_NEONBFDOT_H
|
|
@@ -40,7 +40,7 @@
|
|
|
40
40
|
#ifndef NK_EACH_NEONHALF_H
|
|
41
41
|
#define NK_EACH_NEONHALF_H
|
|
42
42
|
|
|
43
|
-
#if
|
|
43
|
+
#if NK_TARGET_ARM64_
|
|
44
44
|
#if NK_TARGET_NEONHALF
|
|
45
45
|
|
|
46
46
|
#include "numkong/types.h"
|
|
@@ -314,5 +314,5 @@ NK_PUBLIC void nk_each_blend_i8_neonhalf( //
|
|
|
314
314
|
#endif
|
|
315
315
|
|
|
316
316
|
#endif // NK_TARGET_NEONHALF
|
|
317
|
-
#endif //
|
|
317
|
+
#endif // NK_TARGET_ARM64_
|
|
318
318
|
#endif // NK_EACH_NEONHALF_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_EACH_RVV_H
|
|
10
10
|
#define NK_EACH_RVV_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_RISCV64_
|
|
13
13
|
#if NK_TARGET_RVV
|
|
14
14
|
|
|
15
15
|
#include "numkong/types.h"
|
|
@@ -1117,5 +1117,5 @@ NK_PUBLIC void nk_each_fma_f64c_rvv(nk_f64c_t const *a, nk_f64c_t const *b, nk_f
|
|
|
1117
1117
|
#endif
|
|
1118
1118
|
|
|
1119
1119
|
#endif // NK_TARGET_RVV
|
|
1120
|
-
#endif //
|
|
1120
|
+
#endif // NK_TARGET_RISCV64_
|
|
1121
1121
|
#endif // NK_EACH_RVV_H
|
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
#ifndef NK_EACH_SAPPHIRE_H
|
|
25
25
|
#define NK_EACH_SAPPHIRE_H
|
|
26
26
|
|
|
27
|
-
#if
|
|
27
|
+
#if NK_TARGET_X8664_
|
|
28
28
|
#if NK_TARGET_SAPPHIRE
|
|
29
29
|
|
|
30
30
|
#include "numkong/types.h"
|
|
@@ -339,5 +339,5 @@ nk_each_sum_e4m3_sapphire_cycle:
|
|
|
339
339
|
#endif
|
|
340
340
|
|
|
341
341
|
#endif // NK_TARGET_SAPPHIRE
|
|
342
|
-
#endif //
|
|
342
|
+
#endif // NK_TARGET_X8664_
|
|
343
343
|
#endif // NK_EACH_SAPPHIRE_H
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
#ifndef NK_EACH_SKYLAKE_H
|
|
24
24
|
#define NK_EACH_SKYLAKE_H
|
|
25
25
|
|
|
26
|
-
#if
|
|
26
|
+
#if NK_TARGET_X8664_
|
|
27
27
|
#if NK_TARGET_SKYLAKE
|
|
28
28
|
|
|
29
29
|
#include "numkong/types.h"
|
|
@@ -1558,5 +1558,5 @@ nk_each_fma_f16_skylake_cycle:
|
|
|
1558
1558
|
#endif
|
|
1559
1559
|
|
|
1560
1560
|
#endif // NK_TARGET_SKYLAKE
|
|
1561
|
-
#endif //
|
|
1561
|
+
#endif // NK_TARGET_X8664_
|
|
1562
1562
|
#endif // NK_EACH_SKYLAKE_H
|
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
#ifndef NK_GEOSPATIAL_HASWELL_H
|
|
21
21
|
#define NK_GEOSPATIAL_HASWELL_H
|
|
22
22
|
|
|
23
|
-
#if
|
|
23
|
+
#if NK_TARGET_X8664_
|
|
24
24
|
#if NK_TARGET_HASWELL
|
|
25
25
|
|
|
26
26
|
#include "numkong/types.h"
|
|
@@ -629,5 +629,5 @@ NK_PUBLIC void nk_vincenty_f32_haswell( //
|
|
|
629
629
|
#endif
|
|
630
630
|
|
|
631
631
|
#endif // NK_TARGET_HASWELL
|
|
632
|
-
#endif //
|
|
632
|
+
#endif // NK_TARGET_X8664_
|
|
633
633
|
#endif // NK_GEOSPATIAL_HASWELL_H
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
#ifndef NK_GEOSPATIAL_NEON_H
|
|
18
18
|
#define NK_GEOSPATIAL_NEON_H
|
|
19
19
|
|
|
20
|
-
#if
|
|
20
|
+
#if NK_TARGET_ARM64_
|
|
21
21
|
#if NK_TARGET_NEON
|
|
22
22
|
|
|
23
23
|
#include "numkong/types.h"
|
|
@@ -611,5 +611,5 @@ NK_PUBLIC void nk_vincenty_f32_neon( //
|
|
|
611
611
|
#endif
|
|
612
612
|
|
|
613
613
|
#endif // NK_TARGET_NEON
|
|
614
|
-
#endif //
|
|
614
|
+
#endif // NK_TARGET_ARM64_
|
|
615
615
|
#endif // NK_GEOSPATIAL_NEON_H
|
|
@@ -33,7 +33,7 @@
|
|
|
33
33
|
#ifndef NK_GEOSPATIAL_RVV_H
|
|
34
34
|
#define NK_GEOSPATIAL_RVV_H
|
|
35
35
|
|
|
36
|
-
#if
|
|
36
|
+
#if NK_TARGET_RISCV64_
|
|
37
37
|
#if NK_TARGET_RVV
|
|
38
38
|
|
|
39
39
|
#include "numkong/types.h"
|
|
@@ -697,5 +697,5 @@ NK_PUBLIC void nk_vincenty_f32_rvv( //
|
|
|
697
697
|
#endif
|
|
698
698
|
|
|
699
699
|
#endif // NK_TARGET_RVV
|
|
700
|
-
#endif //
|
|
700
|
+
#endif // NK_TARGET_RISCV64_
|
|
701
701
|
#endif // NK_GEOSPATIAL_RVV_H
|
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
#ifndef NK_GEOSPATIAL_SKYLAKE_H
|
|
21
21
|
#define NK_GEOSPATIAL_SKYLAKE_H
|
|
22
22
|
|
|
23
|
-
#if
|
|
23
|
+
#if NK_TARGET_X8664_
|
|
24
24
|
#if NK_TARGET_SKYLAKE
|
|
25
25
|
|
|
26
26
|
#include "numkong/types.h"
|
|
@@ -609,5 +609,5 @@ NK_PUBLIC void nk_vincenty_f32_skylake( //
|
|
|
609
609
|
#endif
|
|
610
610
|
|
|
611
611
|
#endif // NK_TARGET_SKYLAKE
|
|
612
|
-
#endif //
|
|
612
|
+
#endif // NK_TARGET_X8664_
|
|
613
613
|
#endif // NK_GEOSPATIAL_SKYLAKE_H
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
#ifndef NK_MAXSIM_ALDER_H
|
|
18
18
|
#define NK_MAXSIM_ALDER_H
|
|
19
19
|
|
|
20
|
-
#if
|
|
20
|
+
#if NK_TARGET_X8664_
|
|
21
21
|
#if NK_TARGET_ALDER
|
|
22
22
|
|
|
23
23
|
#include "numkong/types.h"
|
|
@@ -507,5 +507,5 @@ NK_PUBLIC void nk_maxsim_packed_f16_alder( //
|
|
|
507
507
|
#endif
|
|
508
508
|
|
|
509
509
|
#endif // NK_TARGET_ALDER
|
|
510
|
-
#endif //
|
|
510
|
+
#endif // NK_TARGET_X8664_
|
|
511
511
|
#endif // NK_MAXSIM_ALDER_H
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
#ifndef NK_MAXSIM_GENOA_H
|
|
16
16
|
#define NK_MAXSIM_GENOA_H
|
|
17
17
|
|
|
18
|
-
#if
|
|
18
|
+
#if NK_TARGET_X8664_
|
|
19
19
|
#if NK_TARGET_GENOA
|
|
20
20
|
|
|
21
21
|
#include "numkong/types.h"
|
|
@@ -111,5 +111,5 @@ NK_PUBLIC void nk_maxsim_packed_bf16_genoa( //
|
|
|
111
111
|
#endif
|
|
112
112
|
|
|
113
113
|
#endif // NK_TARGET_GENOA
|
|
114
|
-
#endif //
|
|
114
|
+
#endif // NK_TARGET_X8664_
|
|
115
115
|
#endif // NK_MAXSIM_GENOA_H
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
#ifndef NK_MAXSIM_HASWELL_H
|
|
17
17
|
#define NK_MAXSIM_HASWELL_H
|
|
18
18
|
|
|
19
|
-
#if
|
|
19
|
+
#if NK_TARGET_X8664_
|
|
20
20
|
#if NK_TARGET_HASWELL
|
|
21
21
|
|
|
22
22
|
#include "numkong/types.h"
|
|
@@ -549,5 +549,5 @@ NK_PUBLIC void nk_maxsim_packed_f16_haswell( //
|
|
|
549
549
|
#endif
|
|
550
550
|
|
|
551
551
|
#endif // NK_TARGET_HASWELL
|
|
552
|
-
#endif //
|
|
552
|
+
#endif // NK_TARGET_X8664_
|
|
553
553
|
#endif // NK_MAXSIM_HASWELL_H
|
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
#ifndef NK_MAXSIM_ICELAKE_H
|
|
23
23
|
#define NK_MAXSIM_ICELAKE_H
|
|
24
24
|
|
|
25
|
-
#if
|
|
25
|
+
#if NK_TARGET_X8664_
|
|
26
26
|
#if NK_TARGET_ICELAKE
|
|
27
27
|
|
|
28
28
|
#include "numkong/types.h"
|
|
@@ -476,5 +476,5 @@ NK_PUBLIC void nk_maxsim_packed_f16_icelake( //
|
|
|
476
476
|
#endif
|
|
477
477
|
|
|
478
478
|
#endif // NK_TARGET_ICELAKE
|
|
479
|
-
#endif //
|
|
479
|
+
#endif // NK_TARGET_X8664_
|
|
480
480
|
#endif // NK_MAXSIM_ICELAKE_H
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
#ifndef NK_MAXSIM_NEONSDOT_H
|
|
14
14
|
#define NK_MAXSIM_NEONSDOT_H
|
|
15
15
|
|
|
16
|
-
#if
|
|
16
|
+
#if NK_TARGET_ARM64_
|
|
17
17
|
#if NK_TARGET_NEONSDOT
|
|
18
18
|
|
|
19
19
|
#include "numkong/types.h"
|
|
@@ -390,5 +390,5 @@ NK_PUBLIC void nk_maxsim_packed_f16_neonsdot( //
|
|
|
390
390
|
#endif
|
|
391
391
|
|
|
392
392
|
#endif // NK_TARGET_NEONSDOT
|
|
393
|
-
#endif //
|
|
393
|
+
#endif // NK_TARGET_ARM64_
|
|
394
394
|
#endif // NK_MAXSIM_NEONSDOT_H
|
|
@@ -45,7 +45,7 @@
|
|
|
45
45
|
#ifndef NK_MAXSIM_SAPPHIREAMX_H
|
|
46
46
|
#define NK_MAXSIM_SAPPHIREAMX_H
|
|
47
47
|
|
|
48
|
-
#if
|
|
48
|
+
#if NK_TARGET_X8664_
|
|
49
49
|
#if NK_TARGET_SAPPHIREAMX
|
|
50
50
|
|
|
51
51
|
#include "numkong/types.h"
|
|
@@ -873,5 +873,5 @@ NK_PUBLIC void nk_maxsim_packed_bf16_sapphireamx( //
|
|
|
873
873
|
#endif
|
|
874
874
|
|
|
875
875
|
#endif // NK_TARGET_SAPPHIREAMX
|
|
876
|
-
#endif //
|
|
876
|
+
#endif // NK_TARGET_X8664_
|
|
877
877
|
#endif // NK_MAXSIM_SAPPHIREAMX_H
|
|
@@ -43,7 +43,7 @@
|
|
|
43
43
|
#ifndef NK_MAXSIM_SME_H
|
|
44
44
|
#define NK_MAXSIM_SME_H
|
|
45
45
|
|
|
46
|
-
#if
|
|
46
|
+
#if NK_TARGET_ARM64_
|
|
47
47
|
#if NK_TARGET_SME
|
|
48
48
|
|
|
49
49
|
#include "numkong/dots/sme.h" // nk_dots_sme_packed_header_t, nk_dots_pack_{f16,bf16}_sme, nk_dots_packed_size_{f16,bf16}_sme
|
|
@@ -950,5 +950,5 @@ NK_PUBLIC void nk_maxsim_packed_f32_sme( //
|
|
|
950
950
|
#endif
|
|
951
951
|
|
|
952
952
|
#endif // NK_TARGET_SME
|
|
953
|
-
#endif //
|
|
953
|
+
#endif // NK_TARGET_ARM64_
|
|
954
954
|
#endif // NK_MAXSIM_SME_H
|
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
#ifndef NK_MESH_HASWELL_H
|
|
23
23
|
#define NK_MESH_HASWELL_H
|
|
24
24
|
|
|
25
|
-
#if
|
|
25
|
+
#if NK_TARGET_X8664_
|
|
26
26
|
#if NK_TARGET_HASWELL
|
|
27
27
|
|
|
28
28
|
#include "numkong/types.h"
|
|
@@ -2106,5 +2106,5 @@ NK_PUBLIC void nk_umeyama_bf16_haswell(nk_bf16_t const *a, nk_bf16_t const *b, n
|
|
|
2106
2106
|
#endif
|
|
2107
2107
|
|
|
2108
2108
|
#endif // NK_TARGET_HASWELL
|
|
2109
|
-
#endif //
|
|
2109
|
+
#endif // NK_TARGET_X8664_
|
|
2110
2110
|
#endif // NK_MESH_HASWELL_H
|
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
#ifndef NK_MESH_NEON_H
|
|
28
28
|
#define NK_MESH_NEON_H
|
|
29
29
|
|
|
30
|
-
#if
|
|
30
|
+
#if NK_TARGET_ARM64_
|
|
31
31
|
#if NK_TARGET_NEON
|
|
32
32
|
|
|
33
33
|
#include "numkong/types.h"
|
|
@@ -1906,5 +1906,5 @@ NK_PUBLIC void nk_umeyama_f16_neon(nk_f16_t const *a, nk_f16_t const *b, nk_size
|
|
|
1906
1906
|
#endif
|
|
1907
1907
|
|
|
1908
1908
|
#endif // NK_TARGET_NEON
|
|
1909
|
-
#endif //
|
|
1909
|
+
#endif // NK_TARGET_ARM64_
|
|
1910
1910
|
#endif // NK_MESH_NEON_H
|
|
@@ -30,7 +30,7 @@
|
|
|
30
30
|
#ifndef NK_MESH_NEONBFDOT_H
|
|
31
31
|
#define NK_MESH_NEONBFDOT_H
|
|
32
32
|
|
|
33
|
-
#if
|
|
33
|
+
#if NK_TARGET_ARM64_
|
|
34
34
|
#if NK_TARGET_NEONBFDOT
|
|
35
35
|
|
|
36
36
|
#include "numkong/types.h"
|
|
@@ -806,5 +806,5 @@ NK_PUBLIC void nk_umeyama_bf16_neonbfdot(nk_bf16_t const *a, nk_bf16_t const *b,
|
|
|
806
806
|
#endif
|
|
807
807
|
|
|
808
808
|
#endif // NK_TARGET_NEONBFDOT
|
|
809
|
-
#endif //
|
|
809
|
+
#endif // NK_TARGET_ARM64_
|
|
810
810
|
#endif // NK_MESH_NEONBFDOT_H
|
|
@@ -38,7 +38,7 @@
|
|
|
38
38
|
#ifndef NK_MESH_RVV_H
|
|
39
39
|
#define NK_MESH_RVV_H
|
|
40
40
|
|
|
41
|
-
#if
|
|
41
|
+
#if NK_TARGET_RISCV64_
|
|
42
42
|
#if NK_TARGET_RVV
|
|
43
43
|
|
|
44
44
|
#include "numkong/types.h"
|
|
@@ -1007,5 +1007,5 @@ NK_PUBLIC void nk_umeyama_bf16_rvv(nk_bf16_t const *a, nk_bf16_t const *b, nk_si
|
|
|
1007
1007
|
#endif
|
|
1008
1008
|
|
|
1009
1009
|
#endif // NK_TARGET_RVV
|
|
1010
|
-
#endif //
|
|
1010
|
+
#endif // NK_TARGET_RISCV64_
|
|
1011
1011
|
#endif // NK_MESH_RVV_H
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
#ifndef NK_MESH_SKYLAKE_H
|
|
22
22
|
#define NK_MESH_SKYLAKE_H
|
|
23
23
|
|
|
24
|
-
#if
|
|
24
|
+
#if NK_TARGET_X8664_
|
|
25
25
|
#if NK_TARGET_SKYLAKE
|
|
26
26
|
|
|
27
27
|
#include "numkong/types.h"
|
|
@@ -2456,5 +2456,5 @@ NK_PUBLIC void nk_umeyama_bf16_skylake(nk_bf16_t const *a, nk_bf16_t const *b, n
|
|
|
2456
2456
|
#endif
|
|
2457
2457
|
|
|
2458
2458
|
#endif // NK_TARGET_SKYLAKE
|
|
2459
|
-
#endif //
|
|
2459
|
+
#endif // NK_TARGET_X8664_
|
|
2460
2460
|
#endif // NK_MESH_SKYLAKE_H
|