numkong 7.4.2 → 7.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +84 -84
- package/c/numkong.c +1 -1
- package/include/numkong/attention/sapphireamx.h +2 -2
- package/include/numkong/attention/sme.h +2 -2
- package/include/numkong/capabilities.h +47 -47
- package/include/numkong/cast/diamond.h +2 -2
- package/include/numkong/cast/haswell.h +2 -2
- package/include/numkong/cast/icelake.h +2 -2
- package/include/numkong/cast/loongsonasx.h +2 -2
- package/include/numkong/cast/neon.h +2 -2
- package/include/numkong/cast/powervsx.h +2 -2
- package/include/numkong/cast/rvv.h +2 -2
- package/include/numkong/cast/sapphire.h +2 -2
- package/include/numkong/cast/skylake.h +2 -2
- package/include/numkong/curved/genoa.h +2 -2
- package/include/numkong/curved/haswell.h +2 -2
- package/include/numkong/curved/neon.h +2 -2
- package/include/numkong/curved/neonbfdot.h +2 -2
- package/include/numkong/curved/rvv.h +2 -2
- package/include/numkong/curved/skylake.h +2 -2
- package/include/numkong/curved/smef64.h +2 -2
- package/include/numkong/dot/alder.h +2 -2
- package/include/numkong/dot/diamond.h +2 -2
- package/include/numkong/dot/genoa.h +2 -2
- package/include/numkong/dot/haswell.h +2 -2
- package/include/numkong/dot/icelake.h +2 -2
- package/include/numkong/dot/loongsonasx.h +2 -2
- package/include/numkong/dot/neon.h +2 -2
- package/include/numkong/dot/neonbfdot.h +2 -2
- package/include/numkong/dot/neonfhm.h +2 -2
- package/include/numkong/dot/neonfp8.h +2 -2
- package/include/numkong/dot/neonsdot.h +2 -2
- package/include/numkong/dot/rvv.h +2 -2
- package/include/numkong/dot/rvvbb.h +2 -2
- package/include/numkong/dot/rvvbf16.h +2 -2
- package/include/numkong/dot/rvvhalf.h +2 -2
- package/include/numkong/dot/sapphire.h +2 -2
- package/include/numkong/dot/sierra.h +2 -2
- package/include/numkong/dot/skylake.h +2 -2
- package/include/numkong/dot/sve.h +2 -2
- package/include/numkong/dot/svebfdot.h +2 -2
- package/include/numkong/dot/svehalf.h +2 -2
- package/include/numkong/dot/svesdot.h +2 -2
- package/include/numkong/dots/alder.h +2 -2
- package/include/numkong/dots/diamond.h +2 -2
- package/include/numkong/dots/genoa.h +2 -2
- package/include/numkong/dots/haswell.h +2 -2
- package/include/numkong/dots/icelake.h +2 -2
- package/include/numkong/dots/loongsonasx.h +2 -2
- package/include/numkong/dots/neon.h +2 -2
- package/include/numkong/dots/neonbfdot.h +2 -2
- package/include/numkong/dots/neonfhm.h +2 -2
- package/include/numkong/dots/neonfp8.h +2 -2
- package/include/numkong/dots/neonsdot.h +2 -2
- package/include/numkong/dots/powervsx.h +2 -2
- package/include/numkong/dots/rvv.h +2 -2
- package/include/numkong/dots/sapphireamx.h +2 -2
- package/include/numkong/dots/sierra.h +2 -2
- package/include/numkong/dots/skylake.h +2 -2
- package/include/numkong/dots/sme.h +10 -10
- package/include/numkong/dots/smebi32.h +2 -2
- package/include/numkong/dots/smef64.h +2 -2
- package/include/numkong/dots/smehalf.h +2 -2
- package/include/numkong/each/haswell.h +2 -2
- package/include/numkong/each/icelake.h +2 -2
- package/include/numkong/each/neon.h +2 -2
- package/include/numkong/each/neonbfdot.h +2 -2
- package/include/numkong/each/neonhalf.h +2 -2
- package/include/numkong/each/rvv.h +2 -2
- package/include/numkong/each/sapphire.h +2 -2
- package/include/numkong/each/skylake.h +2 -2
- package/include/numkong/geospatial/haswell.h +2 -2
- package/include/numkong/geospatial/neon.h +2 -2
- package/include/numkong/geospatial/rvv.h +2 -2
- package/include/numkong/geospatial/skylake.h +2 -2
- package/include/numkong/maxsim/alder.h +2 -2
- package/include/numkong/maxsim/genoa.h +2 -2
- package/include/numkong/maxsim/haswell.h +2 -2
- package/include/numkong/maxsim/icelake.h +2 -2
- package/include/numkong/maxsim/neonsdot.h +2 -2
- package/include/numkong/maxsim/sapphireamx.h +2 -2
- package/include/numkong/maxsim/sme.h +2 -2
- package/include/numkong/mesh/haswell.h +2 -2
- package/include/numkong/mesh/neon.h +2 -2
- package/include/numkong/mesh/neonbfdot.h +2 -2
- package/include/numkong/mesh/rvv.h +2 -2
- package/include/numkong/mesh/skylake.h +2 -2
- package/include/numkong/numkong.h +1 -1
- package/include/numkong/probability/haswell.h +2 -2
- package/include/numkong/probability/neon.h +2 -2
- package/include/numkong/probability/rvv.h +2 -2
- package/include/numkong/probability/skylake.h +2 -2
- package/include/numkong/reduce/alder.h +2 -2
- package/include/numkong/reduce/genoa.h +2 -2
- package/include/numkong/reduce/haswell.h +2 -2
- package/include/numkong/reduce/icelake.h +2 -2
- package/include/numkong/reduce/neon.h +2 -2
- package/include/numkong/reduce/neonbfdot.h +2 -2
- package/include/numkong/reduce/neonfhm.h +2 -2
- package/include/numkong/reduce/neonsdot.h +2 -2
- package/include/numkong/reduce/rvv.h +2 -2
- package/include/numkong/reduce/sierra.h +2 -2
- package/include/numkong/reduce/skylake.h +2 -2
- package/include/numkong/scalar/haswell.h +2 -2
- package/include/numkong/scalar/loongsonasx.h +2 -2
- package/include/numkong/scalar/neon.h +2 -2
- package/include/numkong/scalar/neonhalf.h +2 -2
- package/include/numkong/scalar/powervsx.h +2 -2
- package/include/numkong/scalar/rvv.h +2 -2
- package/include/numkong/scalar/sapphire.h +2 -2
- package/include/numkong/set/haswell.h +2 -2
- package/include/numkong/set/icelake.h +2 -2
- package/include/numkong/set/loongsonasx.h +2 -2
- package/include/numkong/set/neon.h +2 -2
- package/include/numkong/set/powervsx.h +2 -2
- package/include/numkong/set/rvv.h +2 -2
- package/include/numkong/set/rvvbb.h +2 -2
- package/include/numkong/set/sve.h +2 -2
- package/include/numkong/sets/haswell.h +2 -2
- package/include/numkong/sets/icelake.h +2 -2
- package/include/numkong/sets/loongsonasx.h +2 -2
- package/include/numkong/sets/neon.h +2 -2
- package/include/numkong/sets/powervsx.h +2 -2
- package/include/numkong/sets/smebi32.h +2 -2
- package/include/numkong/sparse/icelake.h +2 -2
- package/include/numkong/sparse/neon.h +2 -2
- package/include/numkong/sparse/sve2.h +2 -2
- package/include/numkong/sparse/turin.h +2 -2
- package/include/numkong/spatial/alder.h +2 -2
- package/include/numkong/spatial/diamond.h +2 -2
- package/include/numkong/spatial/genoa.h +2 -2
- package/include/numkong/spatial/haswell.h +2 -2
- package/include/numkong/spatial/icelake.h +2 -2
- package/include/numkong/spatial/loongsonasx.h +2 -2
- package/include/numkong/spatial/neon.h +2 -2
- package/include/numkong/spatial/neonbfdot.h +2 -2
- package/include/numkong/spatial/neonfp8.h +2 -2
- package/include/numkong/spatial/neonsdot.h +2 -2
- package/include/numkong/spatial/powervsx.h +2 -2
- package/include/numkong/spatial/rvv.h +2 -2
- package/include/numkong/spatial/rvvbf16.h +2 -2
- package/include/numkong/spatial/rvvhalf.h +2 -2
- package/include/numkong/spatial/sierra.h +2 -2
- package/include/numkong/spatial/skylake.h +2 -2
- package/include/numkong/spatial/sve.h +2 -2
- package/include/numkong/spatial/svebfdot.h +2 -2
- package/include/numkong/spatial/svehalf.h +2 -2
- package/include/numkong/spatial/svesdot.h +2 -2
- package/include/numkong/spatials/alder.h +2 -2
- package/include/numkong/spatials/diamond.h +2 -2
- package/include/numkong/spatials/genoa.h +2 -2
- package/include/numkong/spatials/haswell.h +2 -2
- package/include/numkong/spatials/icelake.h +2 -2
- package/include/numkong/spatials/loongsonasx.h +2 -2
- package/include/numkong/spatials/neon.h +2 -2
- package/include/numkong/spatials/neonbfdot.h +2 -2
- package/include/numkong/spatials/neonfhm.h +2 -2
- package/include/numkong/spatials/neonfp8.h +2 -2
- package/include/numkong/spatials/neonsdot.h +2 -2
- package/include/numkong/spatials/powervsx.h +2 -2
- package/include/numkong/spatials/rvv.h +2 -2
- package/include/numkong/spatials/sapphireamx.h +2 -2
- package/include/numkong/spatials/sierra.h +2 -2
- package/include/numkong/spatials/skylake.h +2 -2
- package/include/numkong/spatials/sme.h +2 -2
- package/include/numkong/spatials/smef64.h +2 -2
- package/include/numkong/trigonometry/haswell.h +2 -2
- package/include/numkong/trigonometry/neon.h +2 -2
- package/include/numkong/trigonometry/rvv.h +2 -2
- package/include/numkong/trigonometry/skylake.h +2 -2
- package/include/numkong/types.h +88 -80
- package/package.json +7 -7
|
@@ -66,7 +66,7 @@
|
|
|
66
66
|
#ifndef NK_CAST_POWERVSX_H
|
|
67
67
|
#define NK_CAST_POWERVSX_H
|
|
68
68
|
|
|
69
|
-
#if
|
|
69
|
+
#if NK_TARGET_POWER64_
|
|
70
70
|
#if NK_TARGET_POWERVSX
|
|
71
71
|
|
|
72
72
|
#include "numkong/types.h"
|
|
@@ -445,5 +445,5 @@ NK_PUBLIC void nk_cast_powervsx(void const *from, nk_dtype_t from_type, nk_size_
|
|
|
445
445
|
#endif
|
|
446
446
|
|
|
447
447
|
#endif // NK_TARGET_POWERVSX
|
|
448
|
-
#endif //
|
|
448
|
+
#endif // NK_TARGET_POWER64_
|
|
449
449
|
#endif // NK_CAST_POWERVSX_H
|
|
@@ -37,7 +37,7 @@
|
|
|
37
37
|
#ifndef NK_CAST_RVV_H
|
|
38
38
|
#define NK_CAST_RVV_H
|
|
39
39
|
|
|
40
|
-
#if
|
|
40
|
+
#if NK_TARGET_RISCV64_
|
|
41
41
|
#if NK_TARGET_RVV
|
|
42
42
|
|
|
43
43
|
#include "numkong/types.h"
|
|
@@ -966,5 +966,5 @@ NK_PUBLIC void nk_cast_rvv(void const *from, nk_dtype_t from_type, nk_size_t cou
|
|
|
966
966
|
#endif
|
|
967
967
|
|
|
968
968
|
#endif // NK_TARGET_RVV
|
|
969
|
-
#endif //
|
|
969
|
+
#endif // NK_TARGET_RISCV64_
|
|
970
970
|
#endif // NK_CAST_RVV_H
|
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
#ifndef NK_CAST_SAPPHIRE_H
|
|
23
23
|
#define NK_CAST_SAPPHIRE_H
|
|
24
24
|
|
|
25
|
-
#if
|
|
25
|
+
#if NK_TARGET_X8664_
|
|
26
26
|
#if NK_TARGET_SAPPHIRE
|
|
27
27
|
|
|
28
28
|
#include "numkong/types.h"
|
|
@@ -258,5 +258,5 @@ NK_PUBLIC void nk_cast_sapphire(void const *from, nk_dtype_t from_type, nk_size_
|
|
|
258
258
|
#endif
|
|
259
259
|
|
|
260
260
|
#endif // NK_TARGET_SAPPHIRE
|
|
261
|
-
#endif //
|
|
261
|
+
#endif // NK_TARGET_X8664_
|
|
262
262
|
#endif // NK_CAST_SAPPHIRE_H
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
#ifndef NK_CAST_SKYLAKE_H
|
|
24
24
|
#define NK_CAST_SKYLAKE_H
|
|
25
25
|
|
|
26
|
-
#if
|
|
26
|
+
#if NK_TARGET_X8664_
|
|
27
27
|
#if NK_TARGET_SKYLAKE
|
|
28
28
|
|
|
29
29
|
#include "numkong/types.h"
|
|
@@ -911,5 +911,5 @@ NK_PUBLIC void nk_cast_skylake(void const *from, nk_dtype_t from_type, nk_size_t
|
|
|
911
911
|
#endif
|
|
912
912
|
|
|
913
913
|
#endif // NK_TARGET_SKYLAKE
|
|
914
|
-
#endif //
|
|
914
|
+
#endif // NK_TARGET_X8664_
|
|
915
915
|
#endif // NK_CAST_SKYLAKE_H
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
#ifndef NK_CURVED_GENOA_H
|
|
12
12
|
#define NK_CURVED_GENOA_H
|
|
13
13
|
|
|
14
|
-
#if
|
|
14
|
+
#if NK_TARGET_X8664_
|
|
15
15
|
#if NK_TARGET_GENOA
|
|
16
16
|
|
|
17
17
|
#include "numkong/types.h"
|
|
@@ -178,5 +178,5 @@ NK_PUBLIC void nk_bilinear_bf16c_genoa(nk_bf16c_t const *a, nk_bf16c_t const *b,
|
|
|
178
178
|
#endif
|
|
179
179
|
|
|
180
180
|
#endif // NK_TARGET_GENOA
|
|
181
|
-
#endif //
|
|
181
|
+
#endif // NK_TARGET_X8664_
|
|
182
182
|
#endif // NK_CURVED_GENOA_H
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
#ifndef NK_CURVED_HASWELL_H
|
|
12
12
|
#define NK_CURVED_HASWELL_H
|
|
13
13
|
|
|
14
|
-
#if
|
|
14
|
+
#if NK_TARGET_X8664_
|
|
15
15
|
#if NK_TARGET_HASWELL
|
|
16
16
|
|
|
17
17
|
#include "numkong/types.h"
|
|
@@ -272,5 +272,5 @@ NK_PUBLIC void nk_mahalanobis_bf16_haswell(nk_bf16_t const *a, nk_bf16_t const *
|
|
|
272
272
|
#endif
|
|
273
273
|
|
|
274
274
|
#endif // NK_TARGET_HASWELL
|
|
275
|
-
#endif //
|
|
275
|
+
#endif // NK_TARGET_X8664_
|
|
276
276
|
#endif // NK_CURVED_HASWELL_H
|
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
#ifndef NK_CURVED_NEON_H
|
|
25
25
|
#define NK_CURVED_NEON_H
|
|
26
26
|
|
|
27
|
-
#if
|
|
27
|
+
#if NK_TARGET_ARM64_
|
|
28
28
|
#if NK_TARGET_NEON
|
|
29
29
|
|
|
30
30
|
#include "numkong/types.h"
|
|
@@ -325,5 +325,5 @@ NK_PUBLIC void nk_bilinear_f16c_neon(nk_f16c_t const *a_pairs, nk_f16c_t const *
|
|
|
325
325
|
#endif
|
|
326
326
|
|
|
327
327
|
#endif // NK_TARGET_NEON
|
|
328
|
-
#endif //
|
|
328
|
+
#endif // NK_TARGET_ARM64_
|
|
329
329
|
#endif // NK_CURVED_NEON_H
|
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
#ifndef NK_CURVED_NEONBFDOT_H
|
|
25
25
|
#define NK_CURVED_NEONBFDOT_H
|
|
26
26
|
|
|
27
|
-
#if
|
|
27
|
+
#if NK_TARGET_ARM64_
|
|
28
28
|
#if NK_TARGET_NEONBFDOT
|
|
29
29
|
|
|
30
30
|
#include "numkong/types.h" // `nk_bf16_t`
|
|
@@ -207,5 +207,5 @@ NK_PUBLIC void nk_bilinear_bf16c_neonbfdot(nk_bf16c_t const *a_pairs, nk_bf16c_t
|
|
|
207
207
|
#endif
|
|
208
208
|
|
|
209
209
|
#endif // NK_TARGET_NEONBFDOT
|
|
210
|
-
#endif //
|
|
210
|
+
#endif // NK_TARGET_ARM64_
|
|
211
211
|
#endif // NK_CURVED_NEONBFDOT_H
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
#ifndef NK_CURVED_RVV_H
|
|
16
16
|
#define NK_CURVED_RVV_H
|
|
17
17
|
|
|
18
|
-
#if
|
|
18
|
+
#if NK_TARGET_RISCV64_
|
|
19
19
|
#if NK_TARGET_RVV
|
|
20
20
|
|
|
21
21
|
#include "numkong/types.h"
|
|
@@ -301,5 +301,5 @@ NK_PUBLIC void nk_mahalanobis_bf16_rvv(nk_bf16_t const *a, nk_bf16_t const *b, n
|
|
|
301
301
|
#endif
|
|
302
302
|
|
|
303
303
|
#endif // NK_TARGET_RVV
|
|
304
|
-
#endif //
|
|
304
|
+
#endif // NK_TARGET_RISCV64_
|
|
305
305
|
#endif // NK_CURVED_RVV_H
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
#ifndef NK_CURVED_SKYLAKE_H
|
|
14
14
|
#define NK_CURVED_SKYLAKE_H
|
|
15
15
|
|
|
16
|
-
#if
|
|
16
|
+
#if NK_TARGET_X8664_
|
|
17
17
|
#if NK_TARGET_SKYLAKE
|
|
18
18
|
|
|
19
19
|
#include "numkong/types.h"
|
|
@@ -453,5 +453,5 @@ NK_PUBLIC void nk_bilinear_f64c_skylake(nk_f64c_t const *a, nk_f64c_t const *b,
|
|
|
453
453
|
#endif
|
|
454
454
|
|
|
455
455
|
#endif // NK_TARGET_SKYLAKE
|
|
456
|
-
#endif //
|
|
456
|
+
#endif // NK_TARGET_X8664_
|
|
457
457
|
#endif // NK_CURVED_SKYLAKE_H
|
|
@@ -48,7 +48,7 @@
|
|
|
48
48
|
#ifndef NK_CURVED_SMEF64_H
|
|
49
49
|
#define NK_CURVED_SMEF64_H
|
|
50
50
|
|
|
51
|
-
#if
|
|
51
|
+
#if NK_TARGET_ARM64_
|
|
52
52
|
#if NK_TARGET_SMEF64
|
|
53
53
|
|
|
54
54
|
#include "numkong/types.h"
|
|
@@ -506,5 +506,5 @@ NK_PUBLIC void nk_bilinear_f64c_smef64(nk_f64c_t const *a_pairs, nk_f64c_t const
|
|
|
506
506
|
#endif
|
|
507
507
|
|
|
508
508
|
#endif // NK_TARGET_SMEF64
|
|
509
|
-
#endif //
|
|
509
|
+
#endif // NK_TARGET_ARM64_
|
|
510
510
|
#endif // NK_CURVED_SMEF64_H
|
|
@@ -80,7 +80,7 @@
|
|
|
80
80
|
#ifndef NK_DOT_ALDER_H
|
|
81
81
|
#define NK_DOT_ALDER_H
|
|
82
82
|
|
|
83
|
-
#if
|
|
83
|
+
#if NK_TARGET_X8664_
|
|
84
84
|
#if NK_TARGET_ALDER
|
|
85
85
|
|
|
86
86
|
#include "numkong/types.h"
|
|
@@ -559,5 +559,5 @@ NK_INTERNAL void nk_dot_e2m3x32_finalize_alder(
|
|
|
559
559
|
#endif
|
|
560
560
|
|
|
561
561
|
#endif // NK_TARGET_ALDER
|
|
562
|
-
#endif //
|
|
562
|
+
#endif // NK_TARGET_X8664_
|
|
563
563
|
#endif // NK_DOT_ALDER_H
|
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
#ifndef NK_DOT_DIAMOND_H
|
|
28
28
|
#define NK_DOT_DIAMOND_H
|
|
29
29
|
|
|
30
|
-
#if
|
|
30
|
+
#if NK_TARGET_X8664_
|
|
31
31
|
#if NK_TARGET_DIAMOND
|
|
32
32
|
|
|
33
33
|
#include "numkong/types.h"
|
|
@@ -154,5 +154,5 @@ NK_INTERNAL void nk_dot_through_f16_finalize_diamond_(
|
|
|
154
154
|
#endif
|
|
155
155
|
|
|
156
156
|
#endif // NK_TARGET_DIAMOND
|
|
157
|
-
#endif //
|
|
157
|
+
#endif // NK_TARGET_X8664_
|
|
158
158
|
#endif // NK_DOT_DIAMOND_H
|
|
@@ -76,7 +76,7 @@
|
|
|
76
76
|
#ifndef NK_DOT_GENOA_H
|
|
77
77
|
#define NK_DOT_GENOA_H
|
|
78
78
|
|
|
79
|
-
#if
|
|
79
|
+
#if NK_TARGET_X8664_
|
|
80
80
|
#if NK_TARGET_GENOA
|
|
81
81
|
|
|
82
82
|
#include "numkong/types.h"
|
|
@@ -285,5 +285,5 @@ NK_INTERNAL void nk_dot_bf16x32_finalize_genoa(nk_dot_bf16x32_state_genoa_t cons
|
|
|
285
285
|
#endif
|
|
286
286
|
|
|
287
287
|
#endif // NK_TARGET_GENOA
|
|
288
|
-
#endif //
|
|
288
|
+
#endif // NK_TARGET_X8664_
|
|
289
289
|
#endif // NK_DOT_GENOA_H
|
|
@@ -86,7 +86,7 @@
|
|
|
86
86
|
#ifndef NK_DOT_HASWELL_H
|
|
87
87
|
#define NK_DOT_HASWELL_H
|
|
88
88
|
|
|
89
|
-
#if
|
|
89
|
+
#if NK_TARGET_X8664_
|
|
90
90
|
#if NK_TARGET_HASWELL
|
|
91
91
|
|
|
92
92
|
#include "numkong/types.h"
|
|
@@ -1719,5 +1719,5 @@ NK_INTERNAL void nk_dot_u1x128_finalize_haswell( //
|
|
|
1719
1719
|
#endif
|
|
1720
1720
|
|
|
1721
1721
|
#endif // NK_TARGET_HASWELL
|
|
1722
|
-
#endif //
|
|
1722
|
+
#endif // NK_TARGET_X8664_
|
|
1723
1723
|
#endif // NK_DOT_HASWELL_H
|
|
@@ -75,7 +75,7 @@
|
|
|
75
75
|
#ifndef NK_DOT_ICELAKE_H
|
|
76
76
|
#define NK_DOT_ICELAKE_H
|
|
77
77
|
|
|
78
|
-
#if
|
|
78
|
+
#if NK_TARGET_X8664_
|
|
79
79
|
#if NK_TARGET_ICELAKE
|
|
80
80
|
|
|
81
81
|
#include "numkong/types.h"
|
|
@@ -993,5 +993,5 @@ NK_INTERNAL void nk_dot_u1x512_finalize_icelake( //
|
|
|
993
993
|
#endif
|
|
994
994
|
|
|
995
995
|
#endif // NK_TARGET_ICELAKE
|
|
996
|
-
#endif //
|
|
996
|
+
#endif // NK_TARGET_X8664_
|
|
997
997
|
#endif // NK_DOT_ICELAKE_H
|
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
#ifndef NK_DOT_LOONGSONASX_H
|
|
26
26
|
#define NK_DOT_LOONGSONASX_H
|
|
27
27
|
|
|
28
|
-
#if
|
|
28
|
+
#if NK_TARGET_LOONGARCH64_
|
|
29
29
|
#if NK_TARGET_LOONGSONASX
|
|
30
30
|
|
|
31
31
|
#include "numkong/types.h"
|
|
@@ -667,5 +667,5 @@ NK_INTERNAL void nk_dot_u1x256_finalize_loongsonasx(
|
|
|
667
667
|
#endif
|
|
668
668
|
|
|
669
669
|
#endif // NK_TARGET_LOONGSONASX
|
|
670
|
-
#endif //
|
|
670
|
+
#endif // NK_TARGET_LOONGARCH64_
|
|
671
671
|
#endif // NK_DOT_LOONGSONASX_H
|
|
@@ -86,7 +86,7 @@
|
|
|
86
86
|
#ifndef NK_DOT_NEON_H
|
|
87
87
|
#define NK_DOT_NEON_H
|
|
88
88
|
|
|
89
|
-
#if
|
|
89
|
+
#if NK_TARGET_ARM64_
|
|
90
90
|
#if NK_TARGET_NEON
|
|
91
91
|
|
|
92
92
|
#include "numkong/cast/neon.h" // `nk_e4m3x8_to_f16x8_neon_`
|
|
@@ -865,5 +865,5 @@ NK_PUBLIC void nk_vdot_f16c_neon(nk_f16c_t const *a_pairs, nk_f16c_t const *b_pa
|
|
|
865
865
|
#endif
|
|
866
866
|
|
|
867
867
|
#endif // NK_TARGET_NEON
|
|
868
|
-
#endif //
|
|
868
|
+
#endif // NK_TARGET_ARM64_
|
|
869
869
|
#endif // NK_DOT_NEON_H
|
|
@@ -57,7 +57,7 @@
|
|
|
57
57
|
#ifndef NK_DOT_NEONBFDOT_H
|
|
58
58
|
#define NK_DOT_NEONBFDOT_H
|
|
59
59
|
|
|
60
|
-
#if
|
|
60
|
+
#if NK_TARGET_ARM64_
|
|
61
61
|
#if NK_TARGET_NEONBFDOT
|
|
62
62
|
|
|
63
63
|
#include "numkong/types.h"
|
|
@@ -239,5 +239,5 @@ NK_INTERNAL void nk_dot_bf16x8_finalize_neonbfdot(
|
|
|
239
239
|
#endif
|
|
240
240
|
|
|
241
241
|
#endif // NK_TARGET_NEONBFDOT
|
|
242
|
-
#endif //
|
|
242
|
+
#endif // NK_TARGET_ARM64_
|
|
243
243
|
#endif // NK_DOT_NEONBFDOT_H
|
|
@@ -59,7 +59,7 @@
|
|
|
59
59
|
#ifndef NK_DOT_NEONFHM_H
|
|
60
60
|
#define NK_DOT_NEONFHM_H
|
|
61
61
|
|
|
62
|
-
#if
|
|
62
|
+
#if NK_TARGET_ARM64_
|
|
63
63
|
#if NK_TARGET_NEONFHM
|
|
64
64
|
|
|
65
65
|
#include "numkong/types.h"
|
|
@@ -354,5 +354,5 @@ NK_INTERNAL void nk_dot_e5m2x16_finalize_neonfhm(
|
|
|
354
354
|
#endif
|
|
355
355
|
|
|
356
356
|
#endif // NK_TARGET_NEONFHM
|
|
357
|
-
#endif //
|
|
357
|
+
#endif // NK_TARGET_ARM64_
|
|
358
358
|
#endif // NK_DOT_NEONFHM_H
|
|
@@ -30,7 +30,7 @@
|
|
|
30
30
|
#ifndef NK_DOT_NEONFP8_H
|
|
31
31
|
#define NK_DOT_NEONFP8_H
|
|
32
32
|
|
|
33
|
-
#if
|
|
33
|
+
#if NK_TARGET_ARM64_
|
|
34
34
|
#if NK_TARGET_NEONFP8
|
|
35
35
|
|
|
36
36
|
#include "numkong/types.h"
|
|
@@ -319,5 +319,5 @@ NK_INTERNAL void nk_dot_e3m2x16_finalize_neonfp8(
|
|
|
319
319
|
#endif
|
|
320
320
|
|
|
321
321
|
#endif // NK_TARGET_NEONFP8
|
|
322
|
-
#endif //
|
|
322
|
+
#endif // NK_TARGET_ARM64_
|
|
323
323
|
#endif // NK_DOT_NEONFP8_H
|
|
@@ -96,7 +96,7 @@
|
|
|
96
96
|
#ifndef NK_DOT_NEONSDOT_H
|
|
97
97
|
#define NK_DOT_NEONSDOT_H
|
|
98
98
|
|
|
99
|
-
#if
|
|
99
|
+
#if NK_TARGET_ARM64_
|
|
100
100
|
#if NK_TARGET_NEONSDOT
|
|
101
101
|
|
|
102
102
|
#include "numkong/types.h"
|
|
@@ -618,5 +618,5 @@ NK_INTERNAL void nk_dot_e3m2x16_finalize_neonsdot(
|
|
|
618
618
|
#endif
|
|
619
619
|
|
|
620
620
|
#endif // NK_TARGET_NEONSDOT
|
|
621
|
-
#endif //
|
|
621
|
+
#endif // NK_TARGET_ARM64_
|
|
622
622
|
#endif // NK_DOT_NEONSDOT_H
|
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
#ifndef NK_DOT_RVV_H
|
|
23
23
|
#define NK_DOT_RVV_H
|
|
24
24
|
|
|
25
|
-
#if
|
|
25
|
+
#if NK_TARGET_RISCV64_
|
|
26
26
|
#if NK_TARGET_RVV
|
|
27
27
|
|
|
28
28
|
#include "numkong/types.h"
|
|
@@ -718,5 +718,5 @@ NK_PUBLIC void nk_vdot_f64c_rvv(nk_f64c_t const *a_pairs, nk_f64c_t const *b_pai
|
|
|
718
718
|
#endif
|
|
719
719
|
|
|
720
720
|
#endif // NK_TARGET_RVV
|
|
721
|
-
#endif //
|
|
721
|
+
#endif // NK_TARGET_RISCV64_
|
|
722
722
|
#endif // NK_DOT_RVV_H
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
#ifndef NK_DOT_RVVBB_H
|
|
16
16
|
#define NK_DOT_RVVBB_H
|
|
17
17
|
|
|
18
|
-
#if
|
|
18
|
+
#if NK_TARGET_RISCV64_
|
|
19
19
|
#if NK_TARGET_RVVBB
|
|
20
20
|
|
|
21
21
|
#include "numkong/types.h"
|
|
@@ -68,5 +68,5 @@ NK_PUBLIC void nk_dot_u1_rvvbb(nk_u1x8_t const *a, nk_u1x8_t const *b, nk_size_t
|
|
|
68
68
|
#endif
|
|
69
69
|
|
|
70
70
|
#endif // NK_TARGET_RVVBB
|
|
71
|
-
#endif //
|
|
71
|
+
#endif // NK_TARGET_RISCV64_
|
|
72
72
|
#endif // NK_DOT_RVVBB_H
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
#ifndef NK_DOT_RVVBF16_H
|
|
19
19
|
#define NK_DOT_RVVBF16_H
|
|
20
20
|
|
|
21
|
-
#if
|
|
21
|
+
#if NK_TARGET_RISCV64_
|
|
22
22
|
#if NK_TARGET_RVVBF16
|
|
23
23
|
|
|
24
24
|
#include "numkong/types.h"
|
|
@@ -119,5 +119,5 @@ NK_PUBLIC void nk_dot_e5m2_rvvbf16(nk_e5m2_t const *a_scalars, nk_e5m2_t const *
|
|
|
119
119
|
#endif
|
|
120
120
|
|
|
121
121
|
#endif // NK_TARGET_RVVBF16
|
|
122
|
-
#endif //
|
|
122
|
+
#endif // NK_TARGET_RISCV64_
|
|
123
123
|
#endif // NK_DOT_RVVBF16_H
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
#ifndef NK_DOT_RVVHALF_H
|
|
20
20
|
#define NK_DOT_RVVHALF_H
|
|
21
21
|
|
|
22
|
-
#if
|
|
22
|
+
#if NK_TARGET_RISCV64_
|
|
23
23
|
#if NK_TARGET_RVVHALF
|
|
24
24
|
|
|
25
25
|
#include "numkong/types.h"
|
|
@@ -125,5 +125,5 @@ NK_PUBLIC void nk_dot_e5m2_rvvhalf(nk_e5m2_t const *a_scalars, nk_e5m2_t const *
|
|
|
125
125
|
#endif
|
|
126
126
|
|
|
127
127
|
#endif // NK_TARGET_RVVHALF
|
|
128
|
-
#endif //
|
|
128
|
+
#endif // NK_TARGET_RISCV64_
|
|
129
129
|
#endif // NK_DOT_RVVHALF_H
|
|
@@ -32,7 +32,7 @@
|
|
|
32
32
|
#ifndef NK_DOT_SAPPHIRE_H
|
|
33
33
|
#define NK_DOT_SAPPHIRE_H
|
|
34
34
|
|
|
35
|
-
#if
|
|
35
|
+
#if NK_TARGET_X8664_
|
|
36
36
|
#if NK_TARGET_SAPPHIRE
|
|
37
37
|
|
|
38
38
|
#include "numkong/types.h"
|
|
@@ -137,5 +137,5 @@ NK_INTERNAL __m512 nk_flush_f16_to_f32_sapphire_(__m512h acc_f16x32, __m512 sum_
|
|
|
137
137
|
#endif
|
|
138
138
|
|
|
139
139
|
#endif // NK_TARGET_SAPPHIRE
|
|
140
|
-
#endif //
|
|
140
|
+
#endif // NK_TARGET_X8664_
|
|
141
141
|
#endif // NK_DOT_SAPPHIRE_H
|
|
@@ -75,7 +75,7 @@
|
|
|
75
75
|
#ifndef NK_DOT_SIERRA_H
|
|
76
76
|
#define NK_DOT_SIERRA_H
|
|
77
77
|
|
|
78
|
-
#if
|
|
78
|
+
#if NK_TARGET_X8664_
|
|
79
79
|
#if NK_TARGET_SIERRA
|
|
80
80
|
|
|
81
81
|
#include "numkong/types.h"
|
|
@@ -401,5 +401,5 @@ NK_INTERNAL void nk_dot_e2m3x32_finalize_sierra(
|
|
|
401
401
|
#endif
|
|
402
402
|
|
|
403
403
|
#endif // NK_TARGET_SIERRA
|
|
404
|
-
#endif //
|
|
404
|
+
#endif // NK_TARGET_X8664_
|
|
405
405
|
#endif // NK_DOT_SIERRA_H
|
|
@@ -78,7 +78,7 @@
|
|
|
78
78
|
#ifndef NK_DOT_SKYLAKE_H
|
|
79
79
|
#define NK_DOT_SKYLAKE_H
|
|
80
80
|
|
|
81
|
-
#if
|
|
81
|
+
#if NK_TARGET_X8664_
|
|
82
82
|
#if NK_TARGET_SKYLAKE
|
|
83
83
|
|
|
84
84
|
#include "numkong/cast/skylake.h" // `nk_bf16x16_to_f32x16_skylake_`
|
|
@@ -1112,5 +1112,5 @@ NK_INTERNAL void nk_dot_e3m2x64_finalize_skylake(
|
|
|
1112
1112
|
#endif
|
|
1113
1113
|
|
|
1114
1114
|
#endif // NK_TARGET_SKYLAKE
|
|
1115
|
-
#endif //
|
|
1115
|
+
#endif // NK_TARGET_X8664_
|
|
1116
1116
|
#endif // NK_DOT_SKYLAKE_H
|
|
@@ -35,7 +35,7 @@
|
|
|
35
35
|
#ifndef NK_DOT_SVE_H
|
|
36
36
|
#define NK_DOT_SVE_H
|
|
37
37
|
|
|
38
|
-
#if
|
|
38
|
+
#if NK_TARGET_ARM64_
|
|
39
39
|
#if NK_TARGET_SVE
|
|
40
40
|
|
|
41
41
|
#include "numkong/types.h" // `nk_f32_t`
|
|
@@ -415,5 +415,5 @@ NK_PUBLIC void nk_vdot_f64c_sve(nk_f64c_t const *a_pairs, nk_f64c_t const *b_pai
|
|
|
415
415
|
#endif
|
|
416
416
|
|
|
417
417
|
#endif // NK_TARGET_SVE
|
|
418
|
-
#endif //
|
|
418
|
+
#endif // NK_TARGET_ARM64_
|
|
419
419
|
#endif // NK_DOT_SVE_H
|
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
#ifndef NK_DOT_SVEBFDOT_H
|
|
28
28
|
#define NK_DOT_SVEBFDOT_H
|
|
29
29
|
|
|
30
|
-
#if
|
|
30
|
+
#if NK_TARGET_ARM64_
|
|
31
31
|
#if NK_TARGET_SVEBFDOT
|
|
32
32
|
|
|
33
33
|
#include "numkong/types.h"
|
|
@@ -70,5 +70,5 @@ NK_PUBLIC void nk_dot_bf16_svebfdot(nk_bf16_t const *a_scalars, nk_bf16_t const
|
|
|
70
70
|
#endif
|
|
71
71
|
|
|
72
72
|
#endif // NK_TARGET_SVEBFDOT
|
|
73
|
-
#endif //
|
|
73
|
+
#endif // NK_TARGET_ARM64_
|
|
74
74
|
#endif // NK_DOT_SVEBFDOT_H
|
|
@@ -29,7 +29,7 @@
|
|
|
29
29
|
#ifndef NK_DOT_SVEHALF_H
|
|
30
30
|
#define NK_DOT_SVEHALF_H
|
|
31
31
|
|
|
32
|
-
#if
|
|
32
|
+
#if NK_TARGET_ARM64_
|
|
33
33
|
#if NK_TARGET_SVEHALF
|
|
34
34
|
|
|
35
35
|
#include "numkong/types.h" // `nk_f16_t`
|
|
@@ -163,5 +163,5 @@ NK_PUBLIC void nk_vdot_f16c_svehalf(nk_f16c_t const *a_pairs, nk_f16c_t const *b
|
|
|
163
163
|
#endif
|
|
164
164
|
|
|
165
165
|
#endif // NK_TARGET_SVEHALF
|
|
166
|
-
#endif //
|
|
166
|
+
#endif // NK_TARGET_ARM64_
|
|
167
167
|
#endif // NK_DOT_SVEHALF_H
|
|
@@ -30,7 +30,7 @@
|
|
|
30
30
|
#ifndef NK_DOT_SVESDOT_H
|
|
31
31
|
#define NK_DOT_SVESDOT_H
|
|
32
32
|
|
|
33
|
-
#if
|
|
33
|
+
#if NK_TARGET_ARM64_
|
|
34
34
|
#if NK_TARGET_SVESDOT
|
|
35
35
|
|
|
36
36
|
#include "numkong/types.h"
|
|
@@ -85,5 +85,5 @@ NK_PUBLIC void nk_dot_u8_svesdot(nk_u8_t const *a_scalars, nk_u8_t const *b_scal
|
|
|
85
85
|
#endif
|
|
86
86
|
|
|
87
87
|
#endif // NK_TARGET_SVESDOT
|
|
88
|
-
#endif //
|
|
88
|
+
#endif // NK_TARGET_ARM64_
|
|
89
89
|
#endif // NK_DOT_SVESDOT_H
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
#ifndef NK_DOTS_ALDER_H
|
|
13
13
|
#define NK_DOTS_ALDER_H
|
|
14
14
|
|
|
15
|
-
#if
|
|
15
|
+
#if NK_TARGET_X8664_
|
|
16
16
|
#if NK_TARGET_ALDER
|
|
17
17
|
|
|
18
18
|
#include "numkong/dot/alder.h" // Alder-specific dot product helpers
|
|
@@ -113,5 +113,5 @@ nk_define_cross_packed_(dots, e2m3, alder, e2m3, e2m3, f32, nk_b256_vec_t, nk_do
|
|
|
113
113
|
#endif
|
|
114
114
|
|
|
115
115
|
#endif // NK_TARGET_ALDER
|
|
116
|
-
#endif //
|
|
116
|
+
#endif // NK_TARGET_X8664_
|
|
117
117
|
#endif // NK_DOTS_ALDER_H
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
#ifndef NK_DOTS_DIAMOND_H
|
|
13
13
|
#define NK_DOTS_DIAMOND_H
|
|
14
14
|
|
|
15
|
-
#if
|
|
15
|
+
#if NK_TARGET_X8664_
|
|
16
16
|
#if NK_TARGET_DIAMOND
|
|
17
17
|
|
|
18
18
|
#include "numkong/dot/diamond.h"
|
|
@@ -82,5 +82,5 @@ nk_define_cross_packed_(dots, e5m2, diamond, e5m2, e5m2, f32, nk_b512_vec_t, nk_
|
|
|
82
82
|
#endif
|
|
83
83
|
|
|
84
84
|
#endif // NK_TARGET_DIAMOND
|
|
85
|
-
#endif //
|
|
85
|
+
#endif // NK_TARGET_X8664_
|
|
86
86
|
#endif // NK_DOTS_DIAMOND_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_DOTS_GENOA_H
|
|
10
10
|
#define NK_DOTS_GENOA_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_X8664_
|
|
13
13
|
#if NK_TARGET_GENOA
|
|
14
14
|
|
|
15
15
|
#include "numkong/dot/genoa.h"
|
|
@@ -96,5 +96,5 @@ nk_define_cross_packed_(dots, e5m2, genoa, e5m2, bf16, f32, nk_b512_vec_t, nk_do
|
|
|
96
96
|
#endif
|
|
97
97
|
|
|
98
98
|
#endif // NK_TARGET_GENOA
|
|
99
|
-
#endif //
|
|
99
|
+
#endif // NK_TARGET_X8664_
|
|
100
100
|
#endif // NK_DOTS_GENOA_H
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
#ifndef NK_DOTS_HASWELL_H
|
|
24
24
|
#define NK_DOTS_HASWELL_H
|
|
25
25
|
|
|
26
|
-
#if
|
|
26
|
+
#if NK_TARGET_X8664_
|
|
27
27
|
#if NK_TARGET_HASWELL
|
|
28
28
|
|
|
29
29
|
#include "numkong/dot/haswell.h"
|
|
@@ -306,5 +306,5 @@ nk_define_cross_packed_(dots, u1, haswell, u1x8, u1x8, u32, nk_b128_vec_t, nk_do
|
|
|
306
306
|
#endif
|
|
307
307
|
|
|
308
308
|
#endif // NK_TARGET_HASWELL
|
|
309
|
-
#endif //
|
|
309
|
+
#endif // NK_TARGET_X8664_
|
|
310
310
|
#endif // NK_DOTS_HASWELL_H
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
#ifndef NK_DOTS_ICELAKE_H
|
|
22
22
|
#define NK_DOTS_ICELAKE_H
|
|
23
23
|
|
|
24
|
-
#if
|
|
24
|
+
#if NK_TARGET_X8664_
|
|
25
25
|
#if NK_TARGET_ICELAKE
|
|
26
26
|
|
|
27
27
|
#include "numkong/dot/icelake.h"
|
|
@@ -176,5 +176,5 @@ nk_define_cross_packed_(dots, u1, icelake, u1x8, u1x8, u32, nk_b512_vec_t, nk_do
|
|
|
176
176
|
#endif
|
|
177
177
|
|
|
178
178
|
#endif // NK_TARGET_ICELAKE
|
|
179
|
-
#endif //
|
|
179
|
+
#endif // NK_TARGET_X8664_
|
|
180
180
|
#endif // NK_DOTS_ICELAKE_H
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
#ifndef NK_DOTS_LOONGSONASX_H
|
|
14
14
|
#define NK_DOTS_LOONGSONASX_H
|
|
15
15
|
|
|
16
|
-
#if
|
|
16
|
+
#if NK_TARGET_LOONGARCH64_
|
|
17
17
|
#if NK_TARGET_LOONGSONASX
|
|
18
18
|
|
|
19
19
|
#include "numkong/dot/loongsonasx.h"
|
|
@@ -172,5 +172,5 @@ nk_define_cross_packed_(dots, f16, loongsonasx, f16, f32, f32, nk_b256_vec_t, nk
|
|
|
172
172
|
#endif
|
|
173
173
|
|
|
174
174
|
#endif // NK_TARGET_LOONGSONASX
|
|
175
|
-
#endif //
|
|
175
|
+
#endif // NK_TARGET_LOONGARCH64_
|
|
176
176
|
#endif // NK_DOTS_LOONGSONASX_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_DOTS_NEON_H
|
|
10
10
|
#define NK_DOTS_NEON_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_ARM64_
|
|
13
13
|
#if NK_TARGET_NEON
|
|
14
14
|
|
|
15
15
|
#include "numkong/dot/neon.h"
|
|
@@ -119,5 +119,5 @@ nk_define_cross_packed_(dots, f64, neon, f64, f64, f64, nk_b128_vec_t, nk_dot_f6
|
|
|
119
119
|
#endif
|
|
120
120
|
|
|
121
121
|
#endif // NK_TARGET_NEON
|
|
122
|
-
#endif //
|
|
122
|
+
#endif // NK_TARGET_ARM64_
|
|
123
123
|
#endif // NK_DOTS_NEON_H
|