numkong 7.4.2 → 7.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +84 -84
- package/c/numkong.c +1 -1
- package/include/numkong/attention/sapphireamx.h +2 -2
- package/include/numkong/attention/sme.h +2 -2
- package/include/numkong/capabilities.h +47 -47
- package/include/numkong/cast/diamond.h +2 -2
- package/include/numkong/cast/haswell.h +2 -2
- package/include/numkong/cast/icelake.h +2 -2
- package/include/numkong/cast/loongsonasx.h +2 -2
- package/include/numkong/cast/neon.h +2 -2
- package/include/numkong/cast/powervsx.h +2 -2
- package/include/numkong/cast/rvv.h +2 -2
- package/include/numkong/cast/sapphire.h +2 -2
- package/include/numkong/cast/skylake.h +2 -2
- package/include/numkong/curved/genoa.h +2 -2
- package/include/numkong/curved/haswell.h +2 -2
- package/include/numkong/curved/neon.h +2 -2
- package/include/numkong/curved/neonbfdot.h +2 -2
- package/include/numkong/curved/rvv.h +2 -2
- package/include/numkong/curved/skylake.h +2 -2
- package/include/numkong/curved/smef64.h +2 -2
- package/include/numkong/dot/alder.h +2 -2
- package/include/numkong/dot/diamond.h +2 -2
- package/include/numkong/dot/genoa.h +2 -2
- package/include/numkong/dot/haswell.h +2 -2
- package/include/numkong/dot/icelake.h +2 -2
- package/include/numkong/dot/loongsonasx.h +2 -2
- package/include/numkong/dot/neon.h +2 -2
- package/include/numkong/dot/neonbfdot.h +2 -2
- package/include/numkong/dot/neonfhm.h +2 -2
- package/include/numkong/dot/neonfp8.h +2 -2
- package/include/numkong/dot/neonsdot.h +2 -2
- package/include/numkong/dot/rvv.h +2 -2
- package/include/numkong/dot/rvvbb.h +2 -2
- package/include/numkong/dot/rvvbf16.h +2 -2
- package/include/numkong/dot/rvvhalf.h +2 -2
- package/include/numkong/dot/sapphire.h +2 -2
- package/include/numkong/dot/sierra.h +2 -2
- package/include/numkong/dot/skylake.h +2 -2
- package/include/numkong/dot/sve.h +2 -2
- package/include/numkong/dot/svebfdot.h +2 -2
- package/include/numkong/dot/svehalf.h +2 -2
- package/include/numkong/dot/svesdot.h +2 -2
- package/include/numkong/dots/alder.h +2 -2
- package/include/numkong/dots/diamond.h +2 -2
- package/include/numkong/dots/genoa.h +2 -2
- package/include/numkong/dots/haswell.h +2 -2
- package/include/numkong/dots/icelake.h +2 -2
- package/include/numkong/dots/loongsonasx.h +2 -2
- package/include/numkong/dots/neon.h +2 -2
- package/include/numkong/dots/neonbfdot.h +2 -2
- package/include/numkong/dots/neonfhm.h +2 -2
- package/include/numkong/dots/neonfp8.h +2 -2
- package/include/numkong/dots/neonsdot.h +2 -2
- package/include/numkong/dots/powervsx.h +2 -2
- package/include/numkong/dots/rvv.h +2 -2
- package/include/numkong/dots/sapphireamx.h +2 -2
- package/include/numkong/dots/sierra.h +2 -2
- package/include/numkong/dots/skylake.h +2 -2
- package/include/numkong/dots/sme.h +10 -10
- package/include/numkong/dots/smebi32.h +2 -2
- package/include/numkong/dots/smef64.h +2 -2
- package/include/numkong/dots/smehalf.h +2 -2
- package/include/numkong/each/haswell.h +2 -2
- package/include/numkong/each/icelake.h +2 -2
- package/include/numkong/each/neon.h +2 -2
- package/include/numkong/each/neonbfdot.h +2 -2
- package/include/numkong/each/neonhalf.h +2 -2
- package/include/numkong/each/rvv.h +2 -2
- package/include/numkong/each/sapphire.h +2 -2
- package/include/numkong/each/skylake.h +2 -2
- package/include/numkong/geospatial/haswell.h +2 -2
- package/include/numkong/geospatial/neon.h +2 -2
- package/include/numkong/geospatial/rvv.h +2 -2
- package/include/numkong/geospatial/skylake.h +2 -2
- package/include/numkong/maxsim/alder.h +2 -2
- package/include/numkong/maxsim/genoa.h +2 -2
- package/include/numkong/maxsim/haswell.h +2 -2
- package/include/numkong/maxsim/icelake.h +2 -2
- package/include/numkong/maxsim/neonsdot.h +2 -2
- package/include/numkong/maxsim/sapphireamx.h +2 -2
- package/include/numkong/maxsim/sme.h +2 -2
- package/include/numkong/mesh/haswell.h +2 -2
- package/include/numkong/mesh/neon.h +2 -2
- package/include/numkong/mesh/neonbfdot.h +2 -2
- package/include/numkong/mesh/rvv.h +2 -2
- package/include/numkong/mesh/skylake.h +2 -2
- package/include/numkong/numkong.h +1 -1
- package/include/numkong/probability/haswell.h +2 -2
- package/include/numkong/probability/neon.h +2 -2
- package/include/numkong/probability/rvv.h +2 -2
- package/include/numkong/probability/skylake.h +2 -2
- package/include/numkong/reduce/alder.h +2 -2
- package/include/numkong/reduce/genoa.h +2 -2
- package/include/numkong/reduce/haswell.h +2 -2
- package/include/numkong/reduce/icelake.h +2 -2
- package/include/numkong/reduce/neon.h +2 -2
- package/include/numkong/reduce/neonbfdot.h +2 -2
- package/include/numkong/reduce/neonfhm.h +2 -2
- package/include/numkong/reduce/neonsdot.h +2 -2
- package/include/numkong/reduce/rvv.h +2 -2
- package/include/numkong/reduce/sierra.h +2 -2
- package/include/numkong/reduce/skylake.h +2 -2
- package/include/numkong/scalar/haswell.h +2 -2
- package/include/numkong/scalar/loongsonasx.h +2 -2
- package/include/numkong/scalar/neon.h +2 -2
- package/include/numkong/scalar/neonhalf.h +2 -2
- package/include/numkong/scalar/powervsx.h +2 -2
- package/include/numkong/scalar/rvv.h +2 -2
- package/include/numkong/scalar/sapphire.h +2 -2
- package/include/numkong/set/haswell.h +2 -2
- package/include/numkong/set/icelake.h +2 -2
- package/include/numkong/set/loongsonasx.h +2 -2
- package/include/numkong/set/neon.h +2 -2
- package/include/numkong/set/powervsx.h +2 -2
- package/include/numkong/set/rvv.h +2 -2
- package/include/numkong/set/rvvbb.h +2 -2
- package/include/numkong/set/sve.h +2 -2
- package/include/numkong/sets/haswell.h +2 -2
- package/include/numkong/sets/icelake.h +2 -2
- package/include/numkong/sets/loongsonasx.h +2 -2
- package/include/numkong/sets/neon.h +2 -2
- package/include/numkong/sets/powervsx.h +2 -2
- package/include/numkong/sets/smebi32.h +2 -2
- package/include/numkong/sparse/icelake.h +2 -2
- package/include/numkong/sparse/neon.h +2 -2
- package/include/numkong/sparse/sve2.h +2 -2
- package/include/numkong/sparse/turin.h +2 -2
- package/include/numkong/spatial/alder.h +2 -2
- package/include/numkong/spatial/diamond.h +2 -2
- package/include/numkong/spatial/genoa.h +2 -2
- package/include/numkong/spatial/haswell.h +2 -2
- package/include/numkong/spatial/icelake.h +2 -2
- package/include/numkong/spatial/loongsonasx.h +2 -2
- package/include/numkong/spatial/neon.h +2 -2
- package/include/numkong/spatial/neonbfdot.h +2 -2
- package/include/numkong/spatial/neonfp8.h +2 -2
- package/include/numkong/spatial/neonsdot.h +2 -2
- package/include/numkong/spatial/powervsx.h +2 -2
- package/include/numkong/spatial/rvv.h +2 -2
- package/include/numkong/spatial/rvvbf16.h +2 -2
- package/include/numkong/spatial/rvvhalf.h +2 -2
- package/include/numkong/spatial/sierra.h +2 -2
- package/include/numkong/spatial/skylake.h +2 -2
- package/include/numkong/spatial/sve.h +2 -2
- package/include/numkong/spatial/svebfdot.h +2 -2
- package/include/numkong/spatial/svehalf.h +2 -2
- package/include/numkong/spatial/svesdot.h +2 -2
- package/include/numkong/spatials/alder.h +2 -2
- package/include/numkong/spatials/diamond.h +2 -2
- package/include/numkong/spatials/genoa.h +2 -2
- package/include/numkong/spatials/haswell.h +2 -2
- package/include/numkong/spatials/icelake.h +2 -2
- package/include/numkong/spatials/loongsonasx.h +2 -2
- package/include/numkong/spatials/neon.h +2 -2
- package/include/numkong/spatials/neonbfdot.h +2 -2
- package/include/numkong/spatials/neonfhm.h +2 -2
- package/include/numkong/spatials/neonfp8.h +2 -2
- package/include/numkong/spatials/neonsdot.h +2 -2
- package/include/numkong/spatials/powervsx.h +2 -2
- package/include/numkong/spatials/rvv.h +2 -2
- package/include/numkong/spatials/sapphireamx.h +2 -2
- package/include/numkong/spatials/sierra.h +2 -2
- package/include/numkong/spatials/skylake.h +2 -2
- package/include/numkong/spatials/sme.h +2 -2
- package/include/numkong/spatials/smef64.h +2 -2
- package/include/numkong/trigonometry/haswell.h +2 -2
- package/include/numkong/trigonometry/neon.h +2 -2
- package/include/numkong/trigonometry/rvv.h +2 -2
- package/include/numkong/trigonometry/skylake.h +2 -2
- package/include/numkong/types.h +88 -80
- package/package.json +7 -7
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_SPARSE_TURIN_H
|
|
10
10
|
#define NK_SPARSE_TURIN_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_X8664_
|
|
13
13
|
#if NK_TARGET_TURIN
|
|
14
14
|
|
|
15
15
|
#include "numkong/types.h"
|
|
@@ -318,5 +318,5 @@ NK_PUBLIC void nk_sparse_dot_u32f32_turin( //
|
|
|
318
318
|
#endif
|
|
319
319
|
|
|
320
320
|
#endif // NK_TARGET_TURIN
|
|
321
|
-
#endif //
|
|
321
|
+
#endif // NK_TARGET_X8664_
|
|
322
322
|
#endif // NK_SPARSE_TURIN_H
|
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
#ifndef NK_SPATIAL_ALDER_H
|
|
25
25
|
#define NK_SPATIAL_ALDER_H
|
|
26
26
|
|
|
27
|
-
#if
|
|
27
|
+
#if NK_TARGET_X8664_
|
|
28
28
|
#if NK_TARGET_ALDER
|
|
29
29
|
|
|
30
30
|
#include "numkong/types.h"
|
|
@@ -600,5 +600,5 @@ NK_PUBLIC void nk_euclidean_e3m2_alder(nk_e3m2_t const *a, nk_e3m2_t const *b, n
|
|
|
600
600
|
#endif
|
|
601
601
|
|
|
602
602
|
#endif // NK_TARGET_ALDER
|
|
603
|
-
#endif //
|
|
603
|
+
#endif // NK_TARGET_X8664_
|
|
604
604
|
#endif // NK_SPATIAL_ALDER_H
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
#ifndef NK_SPATIAL_DIAMOND_H
|
|
14
14
|
#define NK_SPATIAL_DIAMOND_H
|
|
15
15
|
|
|
16
|
-
#if
|
|
16
|
+
#if NK_TARGET_X8664_
|
|
17
17
|
#if NK_TARGET_DIAMOND
|
|
18
18
|
|
|
19
19
|
#include "numkong/types.h"
|
|
@@ -236,5 +236,5 @@ nk_angular_f16_diamond_cycle:
|
|
|
236
236
|
#endif
|
|
237
237
|
|
|
238
238
|
#endif // NK_TARGET_DIAMOND
|
|
239
|
-
#endif //
|
|
239
|
+
#endif // NK_TARGET_X8664_
|
|
240
240
|
#endif // NK_SPATIAL_DIAMOND_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_SPATIAL_GENOA_H
|
|
10
10
|
#define NK_SPATIAL_GENOA_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_X8664_
|
|
13
13
|
#if NK_TARGET_GENOA
|
|
14
14
|
|
|
15
15
|
#include "numkong/types.h"
|
|
@@ -218,5 +218,5 @@ nk_angular_e5m2_genoa_cycle:
|
|
|
218
218
|
#endif
|
|
219
219
|
|
|
220
220
|
#endif // NK_TARGET_GENOA
|
|
221
|
-
#endif //
|
|
221
|
+
#endif // NK_TARGET_X8664_
|
|
222
222
|
#endif // NK_SPATIAL_GENOA_H
|
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
#ifndef NK_SPATIAL_HASWELL_H
|
|
25
25
|
#define NK_SPATIAL_HASWELL_H
|
|
26
26
|
|
|
27
|
-
#if
|
|
27
|
+
#if NK_TARGET_X8664_
|
|
28
28
|
#if NK_TARGET_HASWELL
|
|
29
29
|
|
|
30
30
|
#include "numkong/types.h"
|
|
@@ -975,5 +975,5 @@ nk_angular_e5m2_haswell_cycle:
|
|
|
975
975
|
|
|
976
976
|
#pragma endregion FP8 Floats
|
|
977
977
|
#endif // NK_TARGET_HASWELL
|
|
978
|
-
#endif //
|
|
978
|
+
#endif // NK_TARGET_X8664_
|
|
979
979
|
#endif // NK_SPATIAL_HASWELL_H
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
#ifndef NK_SPATIAL_ICELAKE_H
|
|
20
20
|
#define NK_SPATIAL_ICELAKE_H
|
|
21
21
|
|
|
22
|
-
#if
|
|
22
|
+
#if NK_TARGET_X8664_
|
|
23
23
|
#if NK_TARGET_ICELAKE
|
|
24
24
|
|
|
25
25
|
#include "numkong/types.h"
|
|
@@ -1063,5 +1063,5 @@ nk_angular_e3m2_icelake_cycle:
|
|
|
1063
1063
|
#endif
|
|
1064
1064
|
|
|
1065
1065
|
#endif // NK_TARGET_ICELAKE
|
|
1066
|
-
#endif //
|
|
1066
|
+
#endif // NK_TARGET_X8664_
|
|
1067
1067
|
#endif // NK_SPATIAL_ICELAKE_H
|
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
#ifndef NK_SPATIAL_LOONGSONASX_H
|
|
21
21
|
#define NK_SPATIAL_LOONGSONASX_H
|
|
22
22
|
|
|
23
|
-
#if
|
|
23
|
+
#if NK_TARGET_LOONGARCH64_
|
|
24
24
|
#if NK_TARGET_LOONGSONASX
|
|
25
25
|
|
|
26
26
|
#include "numkong/types.h"
|
|
@@ -479,5 +479,5 @@ NK_PUBLIC void nk_angular_f16_loongsonasx(nk_f16_t const *a, nk_f16_t const *b,
|
|
|
479
479
|
#endif
|
|
480
480
|
|
|
481
481
|
#endif // NK_TARGET_LOONGSONASX
|
|
482
|
-
#endif //
|
|
482
|
+
#endif // NK_TARGET_LOONGARCH64_
|
|
483
483
|
#endif // NK_SPATIAL_LOONGSONASX_H
|
|
@@ -28,7 +28,7 @@
|
|
|
28
28
|
#ifndef NK_SPATIAL_NEON_H
|
|
29
29
|
#define NK_SPATIAL_NEON_H
|
|
30
30
|
|
|
31
|
-
#if
|
|
31
|
+
#if NK_TARGET_ARM64_
|
|
32
32
|
#if NK_TARGET_NEON
|
|
33
33
|
|
|
34
34
|
#include "numkong/types.h"
|
|
@@ -842,5 +842,5 @@ NK_INTERNAL void nk_euclidean_through_u32_from_dot_neon_(nk_b128_vec_t dots, nk_
|
|
|
842
842
|
|
|
843
843
|
#pragma endregion F16 and BF16 Floats
|
|
844
844
|
#endif // NK_TARGET_NEON
|
|
845
|
-
#endif //
|
|
845
|
+
#endif // NK_TARGET_ARM64_
|
|
846
846
|
#endif // NK_SPATIAL_NEON_H
|
|
@@ -28,7 +28,7 @@
|
|
|
28
28
|
#ifndef NK_SPATIAL_NEONBFDOT_H
|
|
29
29
|
#define NK_SPATIAL_NEONBFDOT_H
|
|
30
30
|
|
|
31
|
-
#if
|
|
31
|
+
#if NK_TARGET_ARM64_
|
|
32
32
|
#if NK_TARGET_NEONBFDOT
|
|
33
33
|
|
|
34
34
|
#include "numkong/types.h"
|
|
@@ -160,5 +160,5 @@ NK_PUBLIC void nk_euclidean_bf16_neonbfdot(nk_bf16_t const *a, nk_bf16_t const *
|
|
|
160
160
|
#endif
|
|
161
161
|
|
|
162
162
|
#endif // NK_TARGET_NEONBFDOT
|
|
163
|
-
#endif //
|
|
163
|
+
#endif // NK_TARGET_ARM64_
|
|
164
164
|
#endif // NK_SPATIAL_NEONBFDOT_H
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
#ifndef NK_SPATIAL_NEONFP8_H
|
|
14
14
|
#define NK_SPATIAL_NEONFP8_H
|
|
15
15
|
|
|
16
|
-
#if
|
|
16
|
+
#if NK_TARGET_ARM64_
|
|
17
17
|
#if NK_TARGET_NEONFP8
|
|
18
18
|
|
|
19
19
|
#include "numkong/types.h"
|
|
@@ -254,5 +254,5 @@ nk_angular_e3m2_neonfp8_cycle:
|
|
|
254
254
|
#endif
|
|
255
255
|
|
|
256
256
|
#endif // NK_TARGET_NEONFP8
|
|
257
|
-
#endif //
|
|
257
|
+
#endif // NK_TARGET_ARM64_
|
|
258
258
|
#endif // NK_SPATIAL_NEONFP8_H
|
|
@@ -29,7 +29,7 @@
|
|
|
29
29
|
#ifndef NK_SPATIAL_NEONSDOT_H
|
|
30
30
|
#define NK_SPATIAL_NEONSDOT_H
|
|
31
31
|
|
|
32
|
-
#if
|
|
32
|
+
#if NK_TARGET_ARM64_
|
|
33
33
|
#if NK_TARGET_NEONSDOT
|
|
34
34
|
|
|
35
35
|
#include "numkong/types.h"
|
|
@@ -425,5 +425,5 @@ nk_angular_u4_neonsdot_cycle:
|
|
|
425
425
|
#endif
|
|
426
426
|
|
|
427
427
|
#endif // NK_TARGET_NEONSDOT
|
|
428
|
-
#endif //
|
|
428
|
+
#endif // NK_TARGET_ARM64_
|
|
429
429
|
#endif // NK_SPATIAL_NEONSDOT_H
|
|
@@ -32,7 +32,7 @@
|
|
|
32
32
|
#ifndef NK_SPATIAL_POWERVSX_H
|
|
33
33
|
#define NK_SPATIAL_POWERVSX_H
|
|
34
34
|
|
|
35
|
-
#if
|
|
35
|
+
#if NK_TARGET_POWER64_
|
|
36
36
|
#if NK_TARGET_POWERVSX
|
|
37
37
|
|
|
38
38
|
#include "numkong/types.h"
|
|
@@ -734,5 +734,5 @@ NK_INTERNAL void nk_euclidean_through_u32_from_dot_powervsx_(nk_b128_vec_t dots,
|
|
|
734
734
|
#endif
|
|
735
735
|
|
|
736
736
|
#endif // NK_TARGET_POWERVSX
|
|
737
|
-
#endif //
|
|
737
|
+
#endif // NK_TARGET_POWER64_
|
|
738
738
|
#endif // NK_SPATIAL_POWERVSX_H
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
#ifndef NK_SPATIAL_RVV_H
|
|
24
24
|
#define NK_SPATIAL_RVV_H
|
|
25
25
|
|
|
26
|
-
#if
|
|
26
|
+
#if NK_TARGET_RISCV64_
|
|
27
27
|
#if NK_TARGET_RVV
|
|
28
28
|
|
|
29
29
|
#include "numkong/types.h"
|
|
@@ -987,5 +987,5 @@ NK_PUBLIC void nk_angular_u4_rvv(nk_u4x2_t const *a_scalars, nk_u4x2_t const *b_
|
|
|
987
987
|
|
|
988
988
|
#pragma endregion I8 and U8 Integers
|
|
989
989
|
#endif // NK_TARGET_RVV
|
|
990
|
-
#endif //
|
|
990
|
+
#endif // NK_TARGET_RISCV64_
|
|
991
991
|
#endif // NK_SPATIAL_RVV_H
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
#ifndef NK_SPATIAL_RVVBF16_H
|
|
18
18
|
#define NK_SPATIAL_RVVBF16_H
|
|
19
19
|
|
|
20
|
-
#if
|
|
20
|
+
#if NK_TARGET_RISCV64_
|
|
21
21
|
#if NK_TARGET_RVVBF16
|
|
22
22
|
|
|
23
23
|
#include "numkong/types.h"
|
|
@@ -124,5 +124,5 @@ NK_PUBLIC void nk_angular_bf16_rvvbf16(nk_bf16_t const *a_scalars, nk_bf16_t con
|
|
|
124
124
|
#endif
|
|
125
125
|
|
|
126
126
|
#endif // NK_TARGET_RVVBF16
|
|
127
|
-
#endif //
|
|
127
|
+
#endif // NK_TARGET_RISCV64_
|
|
128
128
|
#endif // NK_SPATIAL_RVVBF16_H
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
#ifndef NK_SPATIAL_RVVHALF_H
|
|
15
15
|
#define NK_SPATIAL_RVVHALF_H
|
|
16
16
|
|
|
17
|
-
#if
|
|
17
|
+
#if NK_TARGET_RISCV64_
|
|
18
18
|
#if NK_TARGET_RVVHALF
|
|
19
19
|
|
|
20
20
|
#include "numkong/types.h"
|
|
@@ -116,5 +116,5 @@ NK_PUBLIC void nk_angular_f16_rvvhalf(nk_f16_t const *a_scalars, nk_f16_t const
|
|
|
116
116
|
#endif
|
|
117
117
|
|
|
118
118
|
#endif // NK_TARGET_RVVHALF
|
|
119
|
-
#endif //
|
|
119
|
+
#endif // NK_TARGET_RISCV64_
|
|
120
120
|
#endif // NK_SPATIAL_RVVHALF_H
|
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
#ifndef NK_SPATIAL_SIERRA_H
|
|
23
23
|
#define NK_SPATIAL_SIERRA_H
|
|
24
24
|
|
|
25
|
-
#if
|
|
25
|
+
#if NK_TARGET_X8664_
|
|
26
26
|
#if NK_TARGET_SIERRA
|
|
27
27
|
|
|
28
28
|
#include "numkong/types.h"
|
|
@@ -512,5 +512,5 @@ nk_angular_e3m2_sierra_cycle:
|
|
|
512
512
|
#endif
|
|
513
513
|
|
|
514
514
|
#endif // NK_TARGET_SIERRA
|
|
515
|
-
#endif //
|
|
515
|
+
#endif // NK_TARGET_X8664_
|
|
516
516
|
#endif // NK_SPATIAL_SIERRA_H
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
#ifndef NK_SPATIAL_SKYLAKE_H
|
|
22
22
|
#define NK_SPATIAL_SKYLAKE_H
|
|
23
23
|
|
|
24
|
-
#if
|
|
24
|
+
#if NK_TARGET_X8664_
|
|
25
25
|
#if NK_TARGET_SKYLAKE
|
|
26
26
|
|
|
27
27
|
#include "numkong/types.h"
|
|
@@ -601,5 +601,5 @@ nk_angular_e3m2_skylake_cycle:
|
|
|
601
601
|
|
|
602
602
|
#pragma endregion F16 and BF16 Floats
|
|
603
603
|
#endif // NK_TARGET_SKYLAKE
|
|
604
|
-
#endif //
|
|
604
|
+
#endif // NK_TARGET_X8664_
|
|
605
605
|
#endif // NK_SPATIAL_SKYLAKE_H
|
|
@@ -32,7 +32,7 @@
|
|
|
32
32
|
#ifndef NK_SPATIAL_SVE_H
|
|
33
33
|
#define NK_SPATIAL_SVE_H
|
|
34
34
|
|
|
35
|
-
#if
|
|
35
|
+
#if NK_TARGET_ARM64_
|
|
36
36
|
#if NK_TARGET_SVE
|
|
37
37
|
|
|
38
38
|
#include "numkong/types.h"
|
|
@@ -241,5 +241,5 @@ NK_PUBLIC void nk_angular_f64_sve(nk_f64_t const *a, nk_f64_t const *b, nk_size_
|
|
|
241
241
|
#endif
|
|
242
242
|
|
|
243
243
|
#endif // NK_TARGET_SVE
|
|
244
|
-
#endif //
|
|
244
|
+
#endif // NK_TARGET_ARM64_
|
|
245
245
|
#endif // NK_SPATIAL_SVE_H
|
|
@@ -32,7 +32,7 @@
|
|
|
32
32
|
#ifndef NK_SPATIAL_SVEBFDOT_H
|
|
33
33
|
#define NK_SPATIAL_SVEBFDOT_H
|
|
34
34
|
|
|
35
|
-
#if
|
|
35
|
+
#if NK_TARGET_ARM64_
|
|
36
36
|
#if NK_TARGET_SVEBFDOT
|
|
37
37
|
|
|
38
38
|
#include "numkong/types.h"
|
|
@@ -118,5 +118,5 @@ NK_PUBLIC void nk_angular_bf16_svebfdot(nk_bf16_t const *a_enum, nk_bf16_t const
|
|
|
118
118
|
#endif
|
|
119
119
|
|
|
120
120
|
#endif // NK_TARGET_SVEBFDOT
|
|
121
|
-
#endif //
|
|
121
|
+
#endif // NK_TARGET_ARM64_
|
|
122
122
|
#endif // NK_SPATIAL_SVEBFDOT_H
|
|
@@ -28,7 +28,7 @@
|
|
|
28
28
|
#ifndef NK_SPATIAL_SVEHALF_H
|
|
29
29
|
#define NK_SPATIAL_SVEHALF_H
|
|
30
30
|
|
|
31
|
-
#if
|
|
31
|
+
#if NK_TARGET_ARM64_
|
|
32
32
|
#if NK_TARGET_SVEHALF
|
|
33
33
|
|
|
34
34
|
#include "numkong/types.h"
|
|
@@ -131,5 +131,5 @@ NK_PUBLIC void nk_angular_f16_svehalf(nk_f16_t const *a_enum, nk_f16_t const *b_
|
|
|
131
131
|
#endif
|
|
132
132
|
|
|
133
133
|
#endif // NK_TARGET_SVEHALF
|
|
134
|
-
#endif //
|
|
134
|
+
#endif // NK_TARGET_ARM64_
|
|
135
135
|
#endif // NK_SPATIAL_SVEHALF_H
|
|
@@ -30,7 +30,7 @@
|
|
|
30
30
|
#ifndef NK_SPATIAL_SVESDOT_H
|
|
31
31
|
#define NK_SPATIAL_SVESDOT_H
|
|
32
32
|
|
|
33
|
-
#if
|
|
33
|
+
#if NK_TARGET_ARM64_
|
|
34
34
|
#if NK_TARGET_SVESDOT
|
|
35
35
|
|
|
36
36
|
#include "numkong/types.h"
|
|
@@ -138,5 +138,5 @@ NK_PUBLIC void nk_angular_u8_svesdot(nk_u8_t const *a, nk_u8_t const *b, nk_size
|
|
|
138
138
|
#endif
|
|
139
139
|
|
|
140
140
|
#endif // NK_TARGET_SVESDOT
|
|
141
|
-
#endif //
|
|
141
|
+
#endif // NK_TARGET_ARM64_
|
|
142
142
|
#endif // NK_SPATIAL_SVESDOT_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_SPATIALS_ALDER_H
|
|
10
10
|
#define NK_SPATIALS_ALDER_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_X8664_
|
|
13
13
|
#if NK_TARGET_ALDER
|
|
14
14
|
|
|
15
15
|
#include "numkong/spatial/haswell.h"
|
|
@@ -90,5 +90,5 @@ nk_define_cross_normalized_symmetric_(euclidean, e2m3, alder, e2m3, f32, /*norm_
|
|
|
90
90
|
#endif
|
|
91
91
|
|
|
92
92
|
#endif // NK_TARGET_ALDER
|
|
93
|
-
#endif //
|
|
93
|
+
#endif // NK_TARGET_X8664_
|
|
94
94
|
#endif // NK_SPATIALS_ALDER_H
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
#ifndef NK_SPATIALS_DIAMOND_H
|
|
13
13
|
#define NK_SPATIALS_DIAMOND_H
|
|
14
14
|
|
|
15
|
-
#if
|
|
15
|
+
#if NK_TARGET_X8664_
|
|
16
16
|
#if NK_TARGET_DIAMOND
|
|
17
17
|
|
|
18
18
|
#include "numkong/dots/diamond.h"
|
|
@@ -78,5 +78,5 @@ nk_define_cross_normalized_symmetric_(euclidean, e5m2, diamond, e5m2, f32, /*nor
|
|
|
78
78
|
#endif
|
|
79
79
|
|
|
80
80
|
#endif // NK_TARGET_DIAMOND
|
|
81
|
-
#endif //
|
|
81
|
+
#endif // NK_TARGET_X8664_
|
|
82
82
|
#endif // NK_SPATIALS_DIAMOND_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_SPATIALS_GENOA_H
|
|
10
10
|
#define NK_SPATIALS_GENOA_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_X8664_
|
|
13
13
|
#if NK_TARGET_GENOA
|
|
14
14
|
|
|
15
15
|
#include "numkong/spatial/haswell.h"
|
|
@@ -90,5 +90,5 @@ nk_define_cross_normalized_symmetric_(euclidean, e5m2, genoa, e5m2, f32, /*norm_
|
|
|
90
90
|
#endif
|
|
91
91
|
|
|
92
92
|
#endif // NK_TARGET_GENOA
|
|
93
|
-
#endif //
|
|
93
|
+
#endif // NK_TARGET_X8664_
|
|
94
94
|
#endif // NK_SPATIALS_GENOA_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_SPATIALS_HASWELL_H
|
|
10
10
|
#define NK_SPATIALS_HASWELL_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_X8664_
|
|
13
13
|
#if NK_TARGET_HASWELL
|
|
14
14
|
|
|
15
15
|
#include "numkong/spatial/haswell.h"
|
|
@@ -215,5 +215,5 @@ nk_define_cross_normalized_symmetric_(euclidean, u8, haswell, u8, u32, /*norm_va
|
|
|
215
215
|
#endif
|
|
216
216
|
|
|
217
217
|
#endif // NK_TARGET_HASWELL
|
|
218
|
-
#endif //
|
|
218
|
+
#endif // NK_TARGET_X8664_
|
|
219
219
|
#endif // NK_SPATIALS_HASWELL_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_SPATIALS_ICELAKE_H
|
|
10
10
|
#define NK_SPATIALS_ICELAKE_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_X8664_
|
|
13
13
|
#if NK_TARGET_ICELAKE
|
|
14
14
|
|
|
15
15
|
#include "numkong/spatial/haswell.h"
|
|
@@ -109,5 +109,5 @@ nk_define_cross_normalized_symmetric_(euclidean, u4, icelake, u4x2, u32, /*norm_
|
|
|
109
109
|
#endif
|
|
110
110
|
|
|
111
111
|
#endif // NK_TARGET_ICELAKE
|
|
112
|
-
#endif //
|
|
112
|
+
#endif // NK_TARGET_X8664_
|
|
113
113
|
#endif // NK_SPATIALS_ICELAKE_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_SPATIALS_LOONGSONASX_H
|
|
10
10
|
#define NK_SPATIALS_LOONGSONASX_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_LOONGARCH64_
|
|
13
13
|
#if NK_TARGET_LOONGSONASX
|
|
14
14
|
|
|
15
15
|
#include "numkong/spatial/loongsonasx.h"
|
|
@@ -149,5 +149,5 @@ nk_define_cross_normalized_symmetric_(euclidean, f16, loongsonasx, f16, f32, /*n
|
|
|
149
149
|
#endif
|
|
150
150
|
|
|
151
151
|
#endif // NK_TARGET_LOONGSONASX
|
|
152
|
-
#endif //
|
|
152
|
+
#endif // NK_TARGET_LOONGARCH64_
|
|
153
153
|
#endif // NK_SPATIALS_LOONGSONASX_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_SPATIALS_NEON_H
|
|
10
10
|
#define NK_SPATIALS_NEON_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_ARM64_
|
|
13
13
|
#if NK_TARGET_NEON
|
|
14
14
|
|
|
15
15
|
#include "numkong/spatial/neon.h"
|
|
@@ -105,5 +105,5 @@ nk_define_cross_normalized_symmetric_(euclidean, f64, neon, f64, f64, /*norm_val
|
|
|
105
105
|
#endif
|
|
106
106
|
|
|
107
107
|
#endif // NK_TARGET_NEON
|
|
108
|
-
#endif //
|
|
108
|
+
#endif // NK_TARGET_ARM64_
|
|
109
109
|
#endif // NK_SPATIALS_NEON_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_SPATIALS_NEONBFDOT_H
|
|
10
10
|
#define NK_SPATIALS_NEONBFDOT_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_ARM64_
|
|
13
13
|
#if NK_TARGET_NEONBFDOT
|
|
14
14
|
|
|
15
15
|
#include "numkong/spatial/neon.h"
|
|
@@ -56,5 +56,5 @@ nk_define_cross_normalized_symmetric_(euclidean, bf16, neonbfdot, bf16, f32, /*n
|
|
|
56
56
|
#endif
|
|
57
57
|
|
|
58
58
|
#endif // NK_TARGET_NEONBFDOT
|
|
59
|
-
#endif //
|
|
59
|
+
#endif // NK_TARGET_ARM64_
|
|
60
60
|
#endif // NK_SPATIALS_NEONBFDOT_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_SPATIALS_NEONFHM_H
|
|
10
10
|
#define NK_SPATIALS_NEONFHM_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_ARM64_
|
|
13
13
|
#if NK_TARGET_NEONFHM
|
|
14
14
|
|
|
15
15
|
#include "numkong/spatial/neon.h"
|
|
@@ -88,5 +88,5 @@ nk_define_cross_normalized_symmetric_(euclidean, e5m2, neonfhm, e5m2, f32, /*nor
|
|
|
88
88
|
#endif
|
|
89
89
|
|
|
90
90
|
#endif // NK_TARGET_NEONFHM
|
|
91
|
-
#endif //
|
|
91
|
+
#endif // NK_TARGET_ARM64_
|
|
92
92
|
#endif // NK_SPATIALS_NEONFHM_H
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
#ifndef NK_SPATIALS_NEONFP8_H
|
|
13
13
|
#define NK_SPATIALS_NEONFP8_H
|
|
14
14
|
|
|
15
|
-
#if
|
|
15
|
+
#if NK_TARGET_ARM64_
|
|
16
16
|
#if NK_TARGET_NEONFP8
|
|
17
17
|
|
|
18
18
|
#include "numkong/dots/neonfp8.h"
|
|
@@ -107,5 +107,5 @@ nk_define_cross_normalized_symmetric_(euclidean, e3m2, neonfp8, e3m2, f32, f32,
|
|
|
107
107
|
#endif
|
|
108
108
|
|
|
109
109
|
#endif // NK_TARGET_NEONFP8
|
|
110
|
-
#endif //
|
|
110
|
+
#endif // NK_TARGET_ARM64_
|
|
111
111
|
#endif // NK_SPATIALS_NEONFP8_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_SPATIALS_NEONSDOT_H
|
|
10
10
|
#define NK_SPATIALS_NEONSDOT_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_ARM64_
|
|
13
13
|
#if NK_TARGET_NEONSDOT
|
|
14
14
|
|
|
15
15
|
#include "numkong/spatial/neon.h"
|
|
@@ -139,5 +139,5 @@ nk_define_cross_normalized_symmetric_(euclidean, e3m2, neonsdot, e3m2, f32, /*no
|
|
|
139
139
|
#endif
|
|
140
140
|
|
|
141
141
|
#endif // NK_TARGET_NEONSDOT
|
|
142
|
-
#endif //
|
|
142
|
+
#endif // NK_TARGET_ARM64_
|
|
143
143
|
#endif // NK_SPATIALS_NEONSDOT_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_SPATIALS_POWERVSX_H
|
|
10
10
|
#define NK_SPATIALS_POWERVSX_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_POWER64_
|
|
13
13
|
#if NK_TARGET_POWERVSX
|
|
14
14
|
|
|
15
15
|
#include "numkong/spatial/powervsx.h"
|
|
@@ -149,5 +149,5 @@ nk_define_cross_normalized_symmetric_(euclidean, f64, powervsx, f64, f64, /*norm
|
|
|
149
149
|
#endif
|
|
150
150
|
|
|
151
151
|
#endif // NK_TARGET_POWERVSX
|
|
152
|
-
#endif //
|
|
152
|
+
#endif // NK_TARGET_POWER64_
|
|
153
153
|
#endif // NK_SPATIALS_POWERVSX_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_SPATIALS_RVV_H
|
|
10
10
|
#define NK_SPATIALS_RVV_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_RISCV64_
|
|
13
13
|
#if NK_TARGET_RVV
|
|
14
14
|
|
|
15
15
|
#include "numkong/dots/serial.h"
|
|
@@ -1972,5 +1972,5 @@ NK_PUBLIC void nk_euclideans_symmetric_u8_rvv(
|
|
|
1972
1972
|
#endif
|
|
1973
1973
|
|
|
1974
1974
|
#endif // NK_TARGET_RVV
|
|
1975
|
-
#endif //
|
|
1975
|
+
#endif // NK_TARGET_RISCV64_
|
|
1976
1976
|
#endif // NK_SPATIALS_RVV_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_SPATIALS_SAPPHIREAMX_H
|
|
10
10
|
#define NK_SPATIALS_SAPPHIREAMX_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_X8664_
|
|
13
13
|
#if NK_TARGET_SAPPHIREAMX
|
|
14
14
|
|
|
15
15
|
#include "numkong/spatial/skylake.h"
|
|
@@ -1145,5 +1145,5 @@ NK_PUBLIC void nk_euclideans_symmetric_e3m2_sapphireamx(
|
|
|
1145
1145
|
#endif
|
|
1146
1146
|
|
|
1147
1147
|
#endif // NK_TARGET_SAPPHIREAMX
|
|
1148
|
-
#endif //
|
|
1148
|
+
#endif // NK_TARGET_X8664_
|
|
1149
1149
|
#endif // NK_SPATIALS_SAPPHIREAMX_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_SPATIALS_SIERRA_H
|
|
10
10
|
#define NK_SPATIALS_SIERRA_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_X8664_
|
|
13
13
|
#if NK_TARGET_SIERRA
|
|
14
14
|
|
|
15
15
|
#include "numkong/spatial/haswell.h"
|
|
@@ -92,5 +92,5 @@ nk_define_cross_normalized_symmetric_(euclidean, e2m3, sierra, e2m3, f32, /*norm
|
|
|
92
92
|
#endif
|
|
93
93
|
|
|
94
94
|
#endif // NK_TARGET_SIERRA
|
|
95
|
-
#endif //
|
|
95
|
+
#endif // NK_TARGET_X8664_
|
|
96
96
|
#endif // NK_SPATIALS_SIERRA_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_SPATIALS_SKYLAKE_H
|
|
10
10
|
#define NK_SPATIALS_SKYLAKE_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_X8664_
|
|
13
13
|
#if NK_TARGET_SKYLAKE
|
|
14
14
|
|
|
15
15
|
#include "numkong/spatial/skylake.h"
|
|
@@ -180,5 +180,5 @@ nk_define_cross_normalized_symmetric_(euclidean, e3m2, skylake, e3m2, f32, /*nor
|
|
|
180
180
|
#endif
|
|
181
181
|
|
|
182
182
|
#endif // NK_TARGET_SKYLAKE
|
|
183
|
-
#endif //
|
|
183
|
+
#endif // NK_TARGET_X8664_
|
|
184
184
|
#endif // NK_SPATIALS_SKYLAKE_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_SPATIALS_SME_H
|
|
10
10
|
#define NK_SPATIALS_SME_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_ARM64_
|
|
13
13
|
#if NK_TARGET_SME
|
|
14
14
|
|
|
15
15
|
#include "numkong/dots/serial.h"
|
|
@@ -1882,5 +1882,5 @@ NK_PUBLIC void nk_euclideans_symmetric_u4_sme(
|
|
|
1882
1882
|
#endif
|
|
1883
1883
|
|
|
1884
1884
|
#endif // NK_TARGET_SME
|
|
1885
|
-
#endif //
|
|
1885
|
+
#endif // NK_TARGET_ARM64_
|
|
1886
1886
|
#endif // NK_SPATIALS_SME_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_SPATIALS_SMEF64_H
|
|
10
10
|
#define NK_SPATIALS_SMEF64_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_ARM64_
|
|
13
13
|
#if NK_TARGET_SME
|
|
14
14
|
|
|
15
15
|
#include "numkong/dots/serial.h"
|
|
@@ -470,5 +470,5 @@ NK_PUBLIC void nk_euclideans_symmetric_f64_smef64(
|
|
|
470
470
|
#endif
|
|
471
471
|
|
|
472
472
|
#endif // NK_TARGET_SME
|
|
473
|
-
#endif //
|
|
473
|
+
#endif // NK_TARGET_ARM64_
|
|
474
474
|
#endif // NK_SPATIALS_SMEF64_H
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
#ifndef NK_TRIGONOMETRY_HASWELL_H
|
|
24
24
|
#define NK_TRIGONOMETRY_HASWELL_H
|
|
25
25
|
|
|
26
|
-
#if
|
|
26
|
+
#if NK_TARGET_X8664_
|
|
27
27
|
#if NK_TARGET_HASWELL
|
|
28
28
|
|
|
29
29
|
#include "numkong/types.h"
|
|
@@ -649,5 +649,5 @@ NK_PUBLIC void nk_each_atan_f64_haswell(nk_f64_t const *ins, nk_size_t n, nk_f64
|
|
|
649
649
|
#endif
|
|
650
650
|
|
|
651
651
|
#endif // NK_TARGET_HASWELL
|
|
652
|
-
#endif //
|
|
652
|
+
#endif // NK_TARGET_X8664_
|
|
653
653
|
#endif // NK_TRIGONOMETRY_HASWELL_H
|