numkong 7.4.2 → 7.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +100 -100
- package/binding.gyp +3 -0
- package/c/numkong.c +1 -1
- package/include/numkong/attention/sapphireamx.h +2 -2
- package/include/numkong/attention/sme.h +2 -2
- package/include/numkong/capabilities.h +47 -47
- package/include/numkong/cast/diamond.h +2 -2
- package/include/numkong/cast/haswell.h +2 -2
- package/include/numkong/cast/icelake.h +2 -2
- package/include/numkong/cast/loongsonasx.h +2 -2
- package/include/numkong/cast/neon.h +2 -2
- package/include/numkong/cast/powervsx.h +2 -2
- package/include/numkong/cast/rvv.h +2 -2
- package/include/numkong/cast/sapphire.h +2 -2
- package/include/numkong/cast/skylake.h +2 -2
- package/include/numkong/curved/genoa.h +2 -2
- package/include/numkong/curved/haswell.h +2 -2
- package/include/numkong/curved/neon.h +2 -2
- package/include/numkong/curved/neonbfdot.h +2 -2
- package/include/numkong/curved/rvv.h +2 -2
- package/include/numkong/curved/skylake.h +2 -2
- package/include/numkong/curved/smef64.h +2 -2
- package/include/numkong/dot/alder.h +2 -2
- package/include/numkong/dot/diamond.h +2 -2
- package/include/numkong/dot/genoa.h +2 -2
- package/include/numkong/dot/haswell.h +2 -2
- package/include/numkong/dot/icelake.h +2 -2
- package/include/numkong/dot/loongsonasx.h +2 -2
- package/include/numkong/dot/neon.h +2 -2
- package/include/numkong/dot/neonbfdot.h +2 -2
- package/include/numkong/dot/neonfhm.h +2 -2
- package/include/numkong/dot/neonfp8.h +2 -2
- package/include/numkong/dot/neonsdot.h +2 -2
- package/include/numkong/dot/rvv.h +2 -2
- package/include/numkong/dot/rvvbb.h +2 -2
- package/include/numkong/dot/rvvbf16.h +2 -2
- package/include/numkong/dot/rvvhalf.h +2 -2
- package/include/numkong/dot/sapphire.h +2 -2
- package/include/numkong/dot/sierra.h +2 -2
- package/include/numkong/dot/skylake.h +2 -2
- package/include/numkong/dot/sve.h +2 -2
- package/include/numkong/dot/svebfdot.h +2 -2
- package/include/numkong/dot/svehalf.h +2 -2
- package/include/numkong/dot/svesdot.h +2 -2
- package/include/numkong/dots/alder.h +2 -2
- package/include/numkong/dots/diamond.h +2 -2
- package/include/numkong/dots/genoa.h +2 -2
- package/include/numkong/dots/haswell.h +2 -2
- package/include/numkong/dots/icelake.h +2 -2
- package/include/numkong/dots/loongsonasx.h +2 -2
- package/include/numkong/dots/neon.h +2 -2
- package/include/numkong/dots/neonbfdot.h +2 -2
- package/include/numkong/dots/neonfhm.h +2 -2
- package/include/numkong/dots/neonfp8.h +2 -2
- package/include/numkong/dots/neonsdot.h +2 -2
- package/include/numkong/dots/powervsx.h +2 -2
- package/include/numkong/dots/rvv.h +2 -2
- package/include/numkong/dots/sapphireamx.h +2 -2
- package/include/numkong/dots/sierra.h +2 -2
- package/include/numkong/dots/skylake.h +2 -2
- package/include/numkong/dots/sme.h +10 -10
- package/include/numkong/dots/smebi32.h +2 -2
- package/include/numkong/dots/smef64.h +2 -2
- package/include/numkong/dots/smehalf.h +2 -2
- package/include/numkong/each/haswell.h +6 -6
- package/include/numkong/each/icelake.h +2 -2
- package/include/numkong/each/neon.h +2 -2
- package/include/numkong/each/neonbfdot.h +2 -2
- package/include/numkong/each/neonhalf.h +2 -2
- package/include/numkong/each/rvv.h +2 -2
- package/include/numkong/each/sapphire.h +2 -2
- package/include/numkong/each/skylake.h +2 -2
- package/include/numkong/geospatial/haswell.h +2 -2
- package/include/numkong/geospatial/neon.h +2 -2
- package/include/numkong/geospatial/rvv.h +2 -2
- package/include/numkong/geospatial/skylake.h +2 -2
- package/include/numkong/maxsim/alder.h +2 -2
- package/include/numkong/maxsim/genoa.h +2 -2
- package/include/numkong/maxsim/haswell.h +2 -2
- package/include/numkong/maxsim/icelake.h +2 -2
- package/include/numkong/maxsim/neonsdot.h +2 -2
- package/include/numkong/maxsim/sapphireamx.h +2 -2
- package/include/numkong/maxsim/sme.h +2 -2
- package/include/numkong/mesh/haswell.h +2 -2
- package/include/numkong/mesh/neon.h +2 -2
- package/include/numkong/mesh/neonbfdot.h +2 -2
- package/include/numkong/mesh/rvv.h +2 -2
- package/include/numkong/mesh/skylake.h +2 -2
- package/include/numkong/numkong.h +1 -1
- package/include/numkong/probability/haswell.h +2 -2
- package/include/numkong/probability/neon.h +2 -2
- package/include/numkong/probability/rvv.h +2 -2
- package/include/numkong/probability/skylake.h +2 -2
- package/include/numkong/reduce/alder.h +2 -2
- package/include/numkong/reduce/genoa.h +2 -2
- package/include/numkong/reduce/haswell.h +2 -2
- package/include/numkong/reduce/icelake.h +2 -2
- package/include/numkong/reduce/neon.h +2 -2
- package/include/numkong/reduce/neonbfdot.h +2 -2
- package/include/numkong/reduce/neonfhm.h +2 -2
- package/include/numkong/reduce/neonsdot.h +2 -2
- package/include/numkong/reduce/rvv.h +2 -2
- package/include/numkong/reduce/sierra.h +2 -2
- package/include/numkong/reduce/skylake.h +2 -2
- package/include/numkong/scalar/haswell.h +2 -2
- package/include/numkong/scalar/loongsonasx.h +2 -2
- package/include/numkong/scalar/neon.h +2 -2
- package/include/numkong/scalar/neonhalf.h +2 -2
- package/include/numkong/scalar/powervsx.h +2 -2
- package/include/numkong/scalar/rvv.h +2 -2
- package/include/numkong/scalar/sapphire.h +2 -2
- package/include/numkong/set/haswell.h +2 -2
- package/include/numkong/set/icelake.h +2 -2
- package/include/numkong/set/loongsonasx.h +2 -2
- package/include/numkong/set/neon.h +2 -2
- package/include/numkong/set/powervsx.h +2 -2
- package/include/numkong/set/rvv.h +2 -2
- package/include/numkong/set/rvvbb.h +2 -2
- package/include/numkong/set/sve.h +2 -2
- package/include/numkong/sets/haswell.h +2 -2
- package/include/numkong/sets/icelake.h +2 -2
- package/include/numkong/sets/loongsonasx.h +2 -2
- package/include/numkong/sets/neon.h +2 -2
- package/include/numkong/sets/powervsx.h +2 -2
- package/include/numkong/sets/smebi32.h +2 -2
- package/include/numkong/sparse/icelake.h +2 -2
- package/include/numkong/sparse/neon.h +2 -2
- package/include/numkong/sparse/sve2.h +2 -2
- package/include/numkong/sparse/turin.h +2 -2
- package/include/numkong/spatial/alder.h +2 -2
- package/include/numkong/spatial/diamond.h +2 -2
- package/include/numkong/spatial/genoa.h +2 -2
- package/include/numkong/spatial/haswell.h +2 -2
- package/include/numkong/spatial/icelake.h +2 -2
- package/include/numkong/spatial/loongsonasx.h +2 -2
- package/include/numkong/spatial/neon.h +2 -2
- package/include/numkong/spatial/neonbfdot.h +2 -2
- package/include/numkong/spatial/neonfp8.h +2 -2
- package/include/numkong/spatial/neonsdot.h +2 -2
- package/include/numkong/spatial/powervsx.h +2 -2
- package/include/numkong/spatial/rvv.h +2 -2
- package/include/numkong/spatial/rvvbf16.h +2 -2
- package/include/numkong/spatial/rvvhalf.h +2 -2
- package/include/numkong/spatial/sierra.h +2 -2
- package/include/numkong/spatial/skylake.h +2 -2
- package/include/numkong/spatial/sve.h +2 -2
- package/include/numkong/spatial/svebfdot.h +2 -2
- package/include/numkong/spatial/svehalf.h +2 -2
- package/include/numkong/spatial/svesdot.h +2 -2
- package/include/numkong/spatials/alder.h +2 -2
- package/include/numkong/spatials/diamond.h +2 -2
- package/include/numkong/spatials/genoa.h +2 -2
- package/include/numkong/spatials/haswell.h +2 -2
- package/include/numkong/spatials/icelake.h +2 -2
- package/include/numkong/spatials/loongsonasx.h +2 -2
- package/include/numkong/spatials/neon.h +2 -2
- package/include/numkong/spatials/neonbfdot.h +2 -2
- package/include/numkong/spatials/neonfhm.h +2 -2
- package/include/numkong/spatials/neonfp8.h +2 -2
- package/include/numkong/spatials/neonsdot.h +2 -2
- package/include/numkong/spatials/powervsx.h +2 -2
- package/include/numkong/spatials/rvv.h +2 -2
- package/include/numkong/spatials/sapphireamx.h +2 -2
- package/include/numkong/spatials/sierra.h +2 -2
- package/include/numkong/spatials/skylake.h +2 -2
- package/include/numkong/spatials/sme.h +2 -2
- package/include/numkong/spatials/smef64.h +2 -2
- package/include/numkong/trigonometry/haswell.h +2 -2
- package/include/numkong/trigonometry/neon.h +2 -2
- package/include/numkong/trigonometry/rvv.h +2 -2
- package/include/numkong/trigonometry/skylake.h +2 -2
- package/include/numkong/types.h +103 -89
- package/numkong.gypi +3 -0
- package/package.json +7 -7
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
#ifndef NK_MESH_HASWELL_H
|
|
23
23
|
#define NK_MESH_HASWELL_H
|
|
24
24
|
|
|
25
|
-
#if
|
|
25
|
+
#if NK_TARGET_X8664_
|
|
26
26
|
#if NK_TARGET_HASWELL
|
|
27
27
|
|
|
28
28
|
#include "numkong/types.h"
|
|
@@ -2106,5 +2106,5 @@ NK_PUBLIC void nk_umeyama_bf16_haswell(nk_bf16_t const *a, nk_bf16_t const *b, n
|
|
|
2106
2106
|
#endif
|
|
2107
2107
|
|
|
2108
2108
|
#endif // NK_TARGET_HASWELL
|
|
2109
|
-
#endif //
|
|
2109
|
+
#endif // NK_TARGET_X8664_
|
|
2110
2110
|
#endif // NK_MESH_HASWELL_H
|
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
#ifndef NK_MESH_NEON_H
|
|
28
28
|
#define NK_MESH_NEON_H
|
|
29
29
|
|
|
30
|
-
#if
|
|
30
|
+
#if NK_TARGET_ARM64_
|
|
31
31
|
#if NK_TARGET_NEON
|
|
32
32
|
|
|
33
33
|
#include "numkong/types.h"
|
|
@@ -1906,5 +1906,5 @@ NK_PUBLIC void nk_umeyama_f16_neon(nk_f16_t const *a, nk_f16_t const *b, nk_size
|
|
|
1906
1906
|
#endif
|
|
1907
1907
|
|
|
1908
1908
|
#endif // NK_TARGET_NEON
|
|
1909
|
-
#endif //
|
|
1909
|
+
#endif // NK_TARGET_ARM64_
|
|
1910
1910
|
#endif // NK_MESH_NEON_H
|
|
@@ -30,7 +30,7 @@
|
|
|
30
30
|
#ifndef NK_MESH_NEONBFDOT_H
|
|
31
31
|
#define NK_MESH_NEONBFDOT_H
|
|
32
32
|
|
|
33
|
-
#if
|
|
33
|
+
#if NK_TARGET_ARM64_
|
|
34
34
|
#if NK_TARGET_NEONBFDOT
|
|
35
35
|
|
|
36
36
|
#include "numkong/types.h"
|
|
@@ -806,5 +806,5 @@ NK_PUBLIC void nk_umeyama_bf16_neonbfdot(nk_bf16_t const *a, nk_bf16_t const *b,
|
|
|
806
806
|
#endif
|
|
807
807
|
|
|
808
808
|
#endif // NK_TARGET_NEONBFDOT
|
|
809
|
-
#endif //
|
|
809
|
+
#endif // NK_TARGET_ARM64_
|
|
810
810
|
#endif // NK_MESH_NEONBFDOT_H
|
|
@@ -38,7 +38,7 @@
|
|
|
38
38
|
#ifndef NK_MESH_RVV_H
|
|
39
39
|
#define NK_MESH_RVV_H
|
|
40
40
|
|
|
41
|
-
#if
|
|
41
|
+
#if NK_TARGET_RISCV64_
|
|
42
42
|
#if NK_TARGET_RVV
|
|
43
43
|
|
|
44
44
|
#include "numkong/types.h"
|
|
@@ -1007,5 +1007,5 @@ NK_PUBLIC void nk_umeyama_bf16_rvv(nk_bf16_t const *a, nk_bf16_t const *b, nk_si
|
|
|
1007
1007
|
#endif
|
|
1008
1008
|
|
|
1009
1009
|
#endif // NK_TARGET_RVV
|
|
1010
|
-
#endif //
|
|
1010
|
+
#endif // NK_TARGET_RISCV64_
|
|
1011
1011
|
#endif // NK_MESH_RVV_H
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
#ifndef NK_MESH_SKYLAKE_H
|
|
22
22
|
#define NK_MESH_SKYLAKE_H
|
|
23
23
|
|
|
24
|
-
#if
|
|
24
|
+
#if NK_TARGET_X8664_
|
|
25
25
|
#if NK_TARGET_SKYLAKE
|
|
26
26
|
|
|
27
27
|
#include "numkong/types.h"
|
|
@@ -2456,5 +2456,5 @@ NK_PUBLIC void nk_umeyama_bf16_skylake(nk_bf16_t const *a, nk_bf16_t const *b, n
|
|
|
2456
2456
|
#endif
|
|
2457
2457
|
|
|
2458
2458
|
#endif // NK_TARGET_SKYLAKE
|
|
2459
|
-
#endif //
|
|
2459
|
+
#endif // NK_TARGET_X8664_
|
|
2460
2460
|
#endif // NK_MESH_SKYLAKE_H
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
#ifndef NK_NUMKONG_H
|
|
12
12
|
#define NK_NUMKONG_H
|
|
13
13
|
|
|
14
|
-
#include "numkong/capabilities.h" // Runtime detection, like `
|
|
14
|
+
#include "numkong/capabilities.h" // Runtime detection, like `nk_capabilities_x8664_`
|
|
15
15
|
#include "numkong/scalar.h" // Scalar math: sqrt, rsqrt, fma, saturating, order, like `nk_f32_sqrt`
|
|
16
16
|
#include "numkong/cast.h" // Type conversions, like `nk_cast`
|
|
17
17
|
#include "numkong/set.h" // Hamming, Jaccard, like `nk_hamming_u1`
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_PROBABILITY_HASWELL_H
|
|
10
10
|
#define NK_PROBABILITY_HASWELL_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_X8664_
|
|
13
13
|
#if NK_TARGET_HASWELL
|
|
14
14
|
|
|
15
15
|
#include "numkong/types.h"
|
|
@@ -263,5 +263,5 @@ nk_jsd_f64_haswell_cycle:
|
|
|
263
263
|
#endif
|
|
264
264
|
|
|
265
265
|
#endif // NK_TARGET_HASWELL
|
|
266
|
-
#endif //
|
|
266
|
+
#endif // NK_TARGET_X8664_
|
|
267
267
|
#endif // NK_PROBABILITY_HASWELL_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_PROBABILITY_NEON_H
|
|
10
10
|
#define NK_PROBABILITY_NEON_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_ARM64_
|
|
13
13
|
|
|
14
14
|
#include "numkong/types.h"
|
|
15
15
|
#include "numkong/cast/serial.h" // `nk_partial_load_b16x4_serial_`, `nk_partial_load_b32x4_serial_`
|
|
@@ -251,5 +251,5 @@ nk_jsd_f16_neon_cycle:
|
|
|
251
251
|
} // extern "C"
|
|
252
252
|
#endif
|
|
253
253
|
|
|
254
|
-
#endif //
|
|
254
|
+
#endif // NK_TARGET_ARM64_
|
|
255
255
|
#endif // NK_PROBABILITY_NEON_H
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
#ifndef NK_PROBABILITY_RVV_H
|
|
18
18
|
#define NK_PROBABILITY_RVV_H
|
|
19
19
|
|
|
20
|
-
#if
|
|
20
|
+
#if NK_TARGET_RISCV64_
|
|
21
21
|
#if NK_TARGET_RVV
|
|
22
22
|
|
|
23
23
|
#include "numkong/types.h"
|
|
@@ -410,5 +410,5 @@ NK_PUBLIC void nk_jsd_bf16_rvv(nk_bf16_t const *a, nk_bf16_t const *b, nk_size_t
|
|
|
410
410
|
#endif
|
|
411
411
|
|
|
412
412
|
#endif // NK_TARGET_RVV
|
|
413
|
-
#endif //
|
|
413
|
+
#endif // NK_TARGET_RISCV64_
|
|
414
414
|
#endif // NK_PROBABILITY_RVV_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_PROBABILITY_SKYLAKE_H
|
|
10
10
|
#define NK_PROBABILITY_SKYLAKE_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_X8664_
|
|
13
13
|
#if NK_TARGET_SKYLAKE
|
|
14
14
|
|
|
15
15
|
#include "numkong/types.h"
|
|
@@ -320,5 +320,5 @@ nk_jsd_f16_skylake_cycle:
|
|
|
320
320
|
#endif
|
|
321
321
|
|
|
322
322
|
#endif // NK_TARGET_SKYLAKE
|
|
323
|
-
#endif //
|
|
323
|
+
#endif // NK_TARGET_X8664_
|
|
324
324
|
#endif // NK_PROBABILITY_SKYLAKE_H
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
#ifndef NK_REDUCE_ALDER_H
|
|
15
15
|
#define NK_REDUCE_ALDER_H
|
|
16
16
|
|
|
17
|
-
#if
|
|
17
|
+
#if NK_TARGET_X8664_
|
|
18
18
|
#if NK_TARGET_ALDER
|
|
19
19
|
|
|
20
20
|
#include "numkong/types.h"
|
|
@@ -632,5 +632,5 @@ NK_PUBLIC void nk_reduce_moments_e2m3_alder( //
|
|
|
632
632
|
#endif
|
|
633
633
|
|
|
634
634
|
#endif // NK_TARGET_ALDER
|
|
635
|
-
#endif //
|
|
635
|
+
#endif // NK_TARGET_X8664_
|
|
636
636
|
#endif // NK_REDUCE_ALDER_H
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
#ifndef NK_REDUCE_GENOA_H
|
|
19
19
|
#define NK_REDUCE_GENOA_H
|
|
20
20
|
|
|
21
|
-
#if
|
|
21
|
+
#if NK_TARGET_X8664_
|
|
22
22
|
#if NK_TARGET_GENOA
|
|
23
23
|
|
|
24
24
|
#include "numkong/reduce/serial.h"
|
|
@@ -197,5 +197,5 @@ NK_PUBLIC void nk_reduce_moments_e5m2_genoa( //
|
|
|
197
197
|
#endif
|
|
198
198
|
|
|
199
199
|
#endif // NK_TARGET_GENOA
|
|
200
|
-
#endif //
|
|
200
|
+
#endif // NK_TARGET_X8664_
|
|
201
201
|
#endif // NK_REDUCE_GENOA_H
|
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
#ifndef NK_REDUCE_HASWELL_H
|
|
28
28
|
#define NK_REDUCE_HASWELL_H
|
|
29
29
|
|
|
30
|
-
#if
|
|
30
|
+
#if NK_TARGET_X8664_
|
|
31
31
|
#if NK_TARGET_HASWELL
|
|
32
32
|
|
|
33
33
|
#include "numkong/types.h"
|
|
@@ -3781,5 +3781,5 @@ NK_PUBLIC void nk_reduce_moments_u1_haswell( //
|
|
|
3781
3781
|
#endif
|
|
3782
3782
|
|
|
3783
3783
|
#endif // NK_TARGET_HASWELL
|
|
3784
|
-
#endif //
|
|
3784
|
+
#endif // NK_TARGET_X8664_
|
|
3785
3785
|
#endif // NK_REDUCE_HASWELL_H
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
#ifndef NK_REDUCE_ICELAKE_H
|
|
16
16
|
#define NK_REDUCE_ICELAKE_H
|
|
17
17
|
|
|
18
|
-
#if
|
|
18
|
+
#if NK_TARGET_X8664_
|
|
19
19
|
#if NK_TARGET_ICELAKE
|
|
20
20
|
|
|
21
21
|
#include "numkong/reduce/serial.h"
|
|
@@ -545,5 +545,5 @@ NK_PUBLIC void nk_reduce_moments_e3m2_icelake( //
|
|
|
545
545
|
#endif
|
|
546
546
|
|
|
547
547
|
#endif // NK_TARGET_ICELAKE
|
|
548
|
-
#endif //
|
|
548
|
+
#endif // NK_TARGET_X8664_
|
|
549
549
|
#endif // NK_REDUCE_ICELAKE_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_REDUCE_NEON_H
|
|
10
10
|
#define NK_REDUCE_NEON_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_ARM64_
|
|
13
13
|
#if NK_TARGET_NEON
|
|
14
14
|
|
|
15
15
|
#include "numkong/types.h" // `nk_size_t`
|
|
@@ -3947,5 +3947,5 @@ NK_PUBLIC void nk_reduce_moments_f16_neon( //
|
|
|
3947
3947
|
#endif
|
|
3948
3948
|
|
|
3949
3949
|
#endif // NK_TARGET_NEON
|
|
3950
|
-
#endif //
|
|
3950
|
+
#endif // NK_TARGET_ARM64_
|
|
3951
3951
|
#endif // NK_REDUCE_NEON_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_REDUCE_NEONBFDOT_H
|
|
10
10
|
#define NK_REDUCE_NEONBFDOT_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_ARM64_
|
|
13
13
|
#if NK_TARGET_NEONBFDOT
|
|
14
14
|
|
|
15
15
|
#include "numkong/types.h" // `nk_bf16_t`
|
|
@@ -138,5 +138,5 @@ NK_PUBLIC void nk_reduce_moments_bf16_neonbfdot( //
|
|
|
138
138
|
#endif
|
|
139
139
|
|
|
140
140
|
#endif // NK_TARGET_NEONBFDOT
|
|
141
|
-
#endif //
|
|
141
|
+
#endif // NK_TARGET_ARM64_
|
|
142
142
|
#endif // NK_REDUCE_NEONBFDOT_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_REDUCE_NEONFHM_H
|
|
10
10
|
#define NK_REDUCE_NEONFHM_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_ARM64_
|
|
13
13
|
#if NK_TARGET_NEONFHM
|
|
14
14
|
|
|
15
15
|
#include "numkong/types.h" // `nk_e4m3_t`
|
|
@@ -289,5 +289,5 @@ NK_PUBLIC void nk_reduce_moments_e5m2_neonfhm( //
|
|
|
289
289
|
#endif
|
|
290
290
|
|
|
291
291
|
#endif // NK_TARGET_NEONFHM
|
|
292
|
-
#endif //
|
|
292
|
+
#endif // NK_TARGET_ARM64_
|
|
293
293
|
#endif // NK_REDUCE_NEONFHM_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_REDUCE_NEONSDOT_H
|
|
10
10
|
#define NK_REDUCE_NEONSDOT_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_ARM64_
|
|
13
13
|
#if NK_TARGET_NEONSDOT
|
|
14
14
|
|
|
15
15
|
#include "numkong/types.h"
|
|
@@ -353,5 +353,5 @@ NK_PUBLIC void nk_reduce_moments_e2m3_neonsdot( //
|
|
|
353
353
|
#endif
|
|
354
354
|
|
|
355
355
|
#endif // NK_TARGET_NEONSDOT
|
|
356
|
-
#endif //
|
|
356
|
+
#endif // NK_TARGET_ARM64_
|
|
357
357
|
#endif // NK_REDUCE_NEONSDOT_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_REDUCE_RVV_H
|
|
10
10
|
#define NK_REDUCE_RVV_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_RISCV64_
|
|
13
13
|
#if NK_TARGET_RVV
|
|
14
14
|
|
|
15
15
|
#include "numkong/types.h"
|
|
@@ -3529,5 +3529,5 @@ NK_PUBLIC void nk_reduce_minmax_e3m2_rvv( //
|
|
|
3529
3529
|
#endif
|
|
3530
3530
|
|
|
3531
3531
|
#endif // NK_TARGET_RVV
|
|
3532
|
-
#endif //
|
|
3532
|
+
#endif // NK_TARGET_RISCV64_
|
|
3533
3533
|
#endif // NK_REDUCE_RVV_H
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
#ifndef NK_REDUCE_SIERRA_H
|
|
14
14
|
#define NK_REDUCE_SIERRA_H
|
|
15
15
|
|
|
16
|
-
#if
|
|
16
|
+
#if NK_TARGET_X8664_
|
|
17
17
|
#if NK_TARGET_SIERRA
|
|
18
18
|
|
|
19
19
|
#include "numkong/types.h"
|
|
@@ -334,5 +334,5 @@ NK_PUBLIC void nk_reduce_moments_e2m3_sierra( //
|
|
|
334
334
|
#endif
|
|
335
335
|
|
|
336
336
|
#endif // NK_TARGET_SIERRA
|
|
337
|
-
#endif //
|
|
337
|
+
#endif // NK_TARGET_X8664_
|
|
338
338
|
#endif // NK_REDUCE_SIERRA_H
|
|
@@ -35,7 +35,7 @@
|
|
|
35
35
|
#ifndef NK_REDUCE_SKYLAKE_H
|
|
36
36
|
#define NK_REDUCE_SKYLAKE_H
|
|
37
37
|
|
|
38
|
-
#if
|
|
38
|
+
#if NK_TARGET_X8664_
|
|
39
39
|
#if NK_TARGET_SKYLAKE
|
|
40
40
|
|
|
41
41
|
#include "numkong/types.h"
|
|
@@ -3790,5 +3790,5 @@ NK_PUBLIC void nk_reduce_minmax_f16_skylake( //
|
|
|
3790
3790
|
#endif
|
|
3791
3791
|
|
|
3792
3792
|
#endif // NK_TARGET_SKYLAKE
|
|
3793
|
-
#endif //
|
|
3793
|
+
#endif // NK_TARGET_X8664_
|
|
3794
3794
|
#endif // NK_REDUCE_SKYLAKE_H
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
#ifndef NK_SCALAR_HASWELL_H
|
|
20
20
|
#define NK_SCALAR_HASWELL_H
|
|
21
21
|
|
|
22
|
-
#if
|
|
22
|
+
#if NK_TARGET_X8664_
|
|
23
23
|
#if NK_TARGET_HASWELL
|
|
24
24
|
|
|
25
25
|
#include "numkong/types.h"
|
|
@@ -118,5 +118,5 @@ NK_PUBLIC nk_i64_t nk_i64_saturating_mul_haswell(nk_i64_t a, nk_i64_t b) {
|
|
|
118
118
|
#endif
|
|
119
119
|
|
|
120
120
|
#endif // NK_TARGET_HASWELL
|
|
121
|
-
#endif //
|
|
121
|
+
#endif // NK_TARGET_X8664_
|
|
122
122
|
#endif // NK_SCALAR_HASWELL_H
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
#ifndef NK_SCALAR_LOONGSONASX_H
|
|
15
15
|
#define NK_SCALAR_LOONGSONASX_H
|
|
16
16
|
|
|
17
|
-
#if
|
|
17
|
+
#if NK_TARGET_LOONGARCH64_
|
|
18
18
|
#if NK_TARGET_LOONGSONASX
|
|
19
19
|
|
|
20
20
|
#include "numkong/types.h"
|
|
@@ -70,5 +70,5 @@ NK_PUBLIC nk_f64_t nk_f64_rsqrt_loongsonasx(nk_f64_t x) { return 1.0 / nk_f64_sq
|
|
|
70
70
|
#endif
|
|
71
71
|
|
|
72
72
|
#endif // NK_TARGET_LOONGSONASX
|
|
73
|
-
#endif //
|
|
73
|
+
#endif // NK_TARGET_LOONGARCH64_
|
|
74
74
|
#endif // NK_SCALAR_LOONGSONASX_H
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
#ifndef NK_SCALAR_NEON_H
|
|
20
20
|
#define NK_SCALAR_NEON_H
|
|
21
21
|
|
|
22
|
-
#if
|
|
22
|
+
#if NK_TARGET_ARM64_
|
|
23
23
|
#if NK_TARGET_NEON
|
|
24
24
|
|
|
25
25
|
#include "numkong/types.h"
|
|
@@ -118,5 +118,5 @@ NK_PUBLIC nk_i64_t nk_i64_saturating_mul_neon(nk_i64_t a, nk_i64_t b) {
|
|
|
118
118
|
#endif
|
|
119
119
|
|
|
120
120
|
#endif // NK_TARGET_NEON
|
|
121
|
-
#endif //
|
|
121
|
+
#endif // NK_TARGET_ARM64_
|
|
122
122
|
#endif // NK_SCALAR_NEON_H
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
#ifndef NK_SCALAR_NEONHALF_H
|
|
14
14
|
#define NK_SCALAR_NEONHALF_H
|
|
15
15
|
|
|
16
|
-
#if
|
|
16
|
+
#if NK_TARGET_ARM64_
|
|
17
17
|
#if NK_TARGET_NEONHALF
|
|
18
18
|
|
|
19
19
|
#include "numkong/types.h"
|
|
@@ -66,5 +66,5 @@ NK_PUBLIC nk_f16_t nk_f16_fma_neonhalf(nk_f16_t a, nk_f16_t b, nk_f16_t c) {
|
|
|
66
66
|
#endif
|
|
67
67
|
|
|
68
68
|
#endif // NK_TARGET_NEONHALF
|
|
69
|
-
#endif //
|
|
69
|
+
#endif // NK_TARGET_ARM64_
|
|
70
70
|
#endif // NK_SCALAR_NEONHALF_H
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
#ifndef NK_SCALAR_POWERVSX_H
|
|
20
20
|
#define NK_SCALAR_POWERVSX_H
|
|
21
21
|
|
|
22
|
-
#if
|
|
22
|
+
#if NK_TARGET_POWER64_
|
|
23
23
|
#if NK_TARGET_POWERVSX
|
|
24
24
|
|
|
25
25
|
#include "numkong/types.h"
|
|
@@ -92,5 +92,5 @@ NK_PUBLIC nk_f64_t nk_f64_fma_powervsx(nk_f64_t a, nk_f64_t b, nk_f64_t c) {
|
|
|
92
92
|
#endif
|
|
93
93
|
|
|
94
94
|
#endif // NK_TARGET_POWERVSX
|
|
95
|
-
#endif //
|
|
95
|
+
#endif // NK_TARGET_POWER64_
|
|
96
96
|
#endif // NK_SCALAR_POWERVSX_H
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
#ifndef NK_SCALAR_RVV_H
|
|
13
13
|
#define NK_SCALAR_RVV_H
|
|
14
14
|
|
|
15
|
-
#if
|
|
15
|
+
#if NK_TARGET_RISCV64_
|
|
16
16
|
#if NK_TARGET_RVV
|
|
17
17
|
|
|
18
18
|
#include "numkong/types.h"
|
|
@@ -207,5 +207,5 @@ NK_PUBLIC nk_i64_t nk_i64_saturating_mul_rvv(nk_i64_t a, nk_i64_t b) {
|
|
|
207
207
|
#endif
|
|
208
208
|
|
|
209
209
|
#endif // NK_TARGET_RVV
|
|
210
|
-
#endif //
|
|
210
|
+
#endif // NK_TARGET_RISCV64_
|
|
211
211
|
#endif // NK_SCALAR_RVV_H
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
#ifndef NK_SCALAR_SAPPHIRE_H
|
|
12
12
|
#define NK_SCALAR_SAPPHIRE_H
|
|
13
13
|
|
|
14
|
-
#if
|
|
14
|
+
#if NK_TARGET_X8664_
|
|
15
15
|
#if NK_TARGET_SAPPHIRE
|
|
16
16
|
|
|
17
17
|
#include "numkong/types.h"
|
|
@@ -70,5 +70,5 @@ NK_PUBLIC nk_f16_t nk_f16_fma_sapphire(nk_f16_t a, nk_f16_t b, nk_f16_t c) {
|
|
|
70
70
|
#endif
|
|
71
71
|
|
|
72
72
|
#endif // NK_TARGET_SAPPHIRE
|
|
73
|
-
#endif //
|
|
73
|
+
#endif // NK_TARGET_X8664_
|
|
74
74
|
#endif // NK_SCALAR_SAPPHIRE_H
|
|
@@ -38,7 +38,7 @@
|
|
|
38
38
|
#ifndef NK_SET_HASWELL_H
|
|
39
39
|
#define NK_SET_HASWELL_H
|
|
40
40
|
|
|
41
|
-
#if
|
|
41
|
+
#if NK_TARGET_X8664_
|
|
42
42
|
#if NK_TARGET_HASWELL
|
|
43
43
|
|
|
44
44
|
#include "numkong/types.h"
|
|
@@ -330,5 +330,5 @@ NK_INTERNAL void nk_jaccard_f32x4_from_dot_haswell_(nk_b128_vec_t dots, nk_u32_t
|
|
|
330
330
|
#endif
|
|
331
331
|
|
|
332
332
|
#endif // NK_TARGET_HASWELL
|
|
333
|
-
#endif //
|
|
333
|
+
#endif // NK_TARGET_X8664_
|
|
334
334
|
#endif // NK_SET_HASWELL_H
|
|
@@ -37,7 +37,7 @@
|
|
|
37
37
|
#ifndef NK_SET_ICELAKE_H
|
|
38
38
|
#define NK_SET_ICELAKE_H
|
|
39
39
|
|
|
40
|
-
#if
|
|
40
|
+
#if NK_TARGET_X8664_
|
|
41
41
|
#if NK_TARGET_ICELAKE
|
|
42
42
|
|
|
43
43
|
#include "numkong/types.h"
|
|
@@ -481,5 +481,5 @@ NK_INTERNAL void nk_jaccard_f32x4_from_dot_icelake_(nk_b128_vec_t dots, nk_u32_t
|
|
|
481
481
|
#endif
|
|
482
482
|
|
|
483
483
|
#endif // NK_TARGET_ICELAKE
|
|
484
|
-
#endif //
|
|
484
|
+
#endif // NK_TARGET_X8664_
|
|
485
485
|
#endif // NK_SET_ICELAKE_H
|
|
@@ -33,7 +33,7 @@
|
|
|
33
33
|
#ifndef NK_SET_LOONGSONASX_H
|
|
34
34
|
#define NK_SET_LOONGSONASX_H
|
|
35
35
|
|
|
36
|
-
#if
|
|
36
|
+
#if NK_TARGET_LOONGARCH64_
|
|
37
37
|
#if NK_TARGET_LOONGSONASX
|
|
38
38
|
|
|
39
39
|
#include "numkong/types.h"
|
|
@@ -177,5 +177,5 @@ NK_INTERNAL void nk_jaccard_f32x4_from_dot_loongsonasx_(nk_b128_vec_t dots, nk_u
|
|
|
177
177
|
#endif
|
|
178
178
|
|
|
179
179
|
#endif // NK_TARGET_LOONGSONASX
|
|
180
|
-
#endif //
|
|
180
|
+
#endif // NK_TARGET_LOONGARCH64_
|
|
181
181
|
#endif // NK_SET_LOONGSONASX_H
|
|
@@ -41,7 +41,7 @@
|
|
|
41
41
|
#ifndef NK_SET_NEON_H
|
|
42
42
|
#define NK_SET_NEON_H
|
|
43
43
|
|
|
44
|
-
#if
|
|
44
|
+
#if NK_TARGET_ARM64_
|
|
45
45
|
#if NK_TARGET_NEON
|
|
46
46
|
|
|
47
47
|
#include "numkong/types.h" // `nk_u1x8_t`
|
|
@@ -359,5 +359,5 @@ NK_INTERNAL void nk_jaccard_f32x4_from_dot_neon_(nk_b128_vec_t dots, nk_u32_t qu
|
|
|
359
359
|
#endif
|
|
360
360
|
|
|
361
361
|
#endif // NK_TARGET_NEON
|
|
362
|
-
#endif //
|
|
362
|
+
#endif // NK_TARGET_ARM64_
|
|
363
363
|
#endif // NK_SET_NEON_H
|
|
@@ -40,7 +40,7 @@
|
|
|
40
40
|
#ifndef NK_SET_POWERVSX_H
|
|
41
41
|
#define NK_SET_POWERVSX_H
|
|
42
42
|
|
|
43
|
-
#if
|
|
43
|
+
#if NK_TARGET_POWER64_
|
|
44
44
|
#if NK_TARGET_POWERVSX
|
|
45
45
|
|
|
46
46
|
#include "numkong/types.h"
|
|
@@ -322,5 +322,5 @@ NK_INTERNAL void nk_jaccard_f32x4_from_dot_powervsx_(nk_b128_vec_t dots, nk_u32_
|
|
|
322
322
|
#endif
|
|
323
323
|
|
|
324
324
|
#endif // NK_TARGET_POWERVSX
|
|
325
|
-
#endif //
|
|
325
|
+
#endif // NK_TARGET_POWER64_
|
|
326
326
|
#endif // NK_SET_POWERVSX_H
|
|
@@ -33,7 +33,7 @@
|
|
|
33
33
|
#ifndef NK_SET_RVV_H
|
|
34
34
|
#define NK_SET_RVV_H
|
|
35
35
|
|
|
36
|
-
#if
|
|
36
|
+
#if NK_TARGET_RISCV64_
|
|
37
37
|
#if NK_TARGET_RVV
|
|
38
38
|
|
|
39
39
|
#include "numkong/types.h"
|
|
@@ -222,5 +222,5 @@ NK_PUBLIC void nk_jaccard_u16_rvv(nk_u16_t const *a, nk_u16_t const *b, nk_size_
|
|
|
222
222
|
#endif
|
|
223
223
|
|
|
224
224
|
#endif // NK_TARGET_RVV
|
|
225
|
-
#endif //
|
|
225
|
+
#endif // NK_TARGET_RISCV64_
|
|
226
226
|
#endif // NK_SET_RVV_H
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
#ifndef NK_SET_RVVBB_H
|
|
19
19
|
#define NK_SET_RVVBB_H
|
|
20
20
|
|
|
21
|
-
#if
|
|
21
|
+
#if NK_TARGET_RISCV64_
|
|
22
22
|
#if NK_TARGET_RVVBB
|
|
23
23
|
|
|
24
24
|
#include "numkong/types.h"
|
|
@@ -113,5 +113,5 @@ NK_PUBLIC void nk_jaccard_u1_rvvbb(nk_u1x8_t const *a, nk_u1x8_t const *b, nk_si
|
|
|
113
113
|
#endif
|
|
114
114
|
|
|
115
115
|
#endif // NK_TARGET_RVVBB
|
|
116
|
-
#endif //
|
|
116
|
+
#endif // NK_TARGET_RISCV64_
|
|
117
117
|
#endif // NK_SET_RVVBB_H
|
|
@@ -29,7 +29,7 @@
|
|
|
29
29
|
#ifndef NK_SET_SVE_H
|
|
30
30
|
#define NK_SET_SVE_H
|
|
31
31
|
|
|
32
|
-
#if
|
|
32
|
+
#if NK_TARGET_ARM64_
|
|
33
33
|
#if NK_TARGET_SVE
|
|
34
34
|
|
|
35
35
|
#include "numkong/types.h" // `nk_u1x8_t`
|
|
@@ -182,5 +182,5 @@ NK_PUBLIC void nk_jaccard_u16_sve(nk_u16_t const *a, nk_u16_t const *b, nk_size_
|
|
|
182
182
|
#endif
|
|
183
183
|
|
|
184
184
|
#endif // NK_TARGET_SVE
|
|
185
|
-
#endif //
|
|
185
|
+
#endif // NK_TARGET_ARM64_
|
|
186
186
|
#endif // NK_SET_SVE_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_SETS_HASWELL_H
|
|
10
10
|
#define NK_SETS_HASWELL_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_X8664_
|
|
13
13
|
#if NK_TARGET_HASWELL
|
|
14
14
|
|
|
15
15
|
#include "numkong/set/haswell.h"
|
|
@@ -59,5 +59,5 @@ nk_define_cross_normalized_symmetric_(jaccard, u1, haswell, u1x8, u32, /*norm_va
|
|
|
59
59
|
#endif
|
|
60
60
|
|
|
61
61
|
#endif // NK_TARGET_HASWELL
|
|
62
|
-
#endif //
|
|
62
|
+
#endif // NK_TARGET_X8664_
|
|
63
63
|
#endif // NK_SETS_HASWELL_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_SETS_ICELAKE_H
|
|
10
10
|
#define NK_SETS_ICELAKE_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_X8664_
|
|
13
13
|
#if NK_TARGET_ICELAKE
|
|
14
14
|
|
|
15
15
|
#include "numkong/set/icelake.h"
|
|
@@ -62,5 +62,5 @@ nk_define_cross_normalized_symmetric_(jaccard, u1, icelake, u1x8, u32, /*norm_va
|
|
|
62
62
|
#endif
|
|
63
63
|
|
|
64
64
|
#endif // NK_TARGET_ICELAKE
|
|
65
|
-
#endif //
|
|
65
|
+
#endif // NK_TARGET_X8664_
|
|
66
66
|
#endif // NK_SETS_ICELAKE_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_SETS_LOONGSONASX_H
|
|
10
10
|
#define NK_SETS_LOONGSONASX_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_LOONGARCH64_
|
|
13
13
|
#if NK_TARGET_LOONGSONASX
|
|
14
14
|
|
|
15
15
|
#include "numkong/set/loongsonasx.h"
|
|
@@ -48,5 +48,5 @@ nk_define_cross_normalized_symmetric_(jaccard, u1, loongsonasx, u1x8, u32, /*nor
|
|
|
48
48
|
#endif
|
|
49
49
|
|
|
50
50
|
#endif // NK_TARGET_LOONGSONASX
|
|
51
|
-
#endif //
|
|
51
|
+
#endif // NK_TARGET_LOONGARCH64_
|
|
52
52
|
#endif // NK_SETS_LOONGSONASX_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_SETS_NEON_H
|
|
10
10
|
#define NK_SETS_NEON_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_ARM64_
|
|
13
13
|
#if NK_TARGET_NEON
|
|
14
14
|
|
|
15
15
|
#include "numkong/set/neon.h"
|
|
@@ -57,5 +57,5 @@ nk_define_cross_normalized_symmetric_(jaccard, u1, neon, u1x8, u32, /*norm_value
|
|
|
57
57
|
#endif
|
|
58
58
|
|
|
59
59
|
#endif // NK_TARGET_NEON
|
|
60
|
-
#endif //
|
|
60
|
+
#endif // NK_TARGET_ARM64_
|
|
61
61
|
#endif // NK_SETS_NEON_H
|