numkong 7.4.2 → 7.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +84 -84
- package/c/numkong.c +1 -1
- package/include/numkong/attention/sapphireamx.h +2 -2
- package/include/numkong/attention/sme.h +2 -2
- package/include/numkong/capabilities.h +47 -47
- package/include/numkong/cast/diamond.h +2 -2
- package/include/numkong/cast/haswell.h +2 -2
- package/include/numkong/cast/icelake.h +2 -2
- package/include/numkong/cast/loongsonasx.h +2 -2
- package/include/numkong/cast/neon.h +2 -2
- package/include/numkong/cast/powervsx.h +2 -2
- package/include/numkong/cast/rvv.h +2 -2
- package/include/numkong/cast/sapphire.h +2 -2
- package/include/numkong/cast/skylake.h +2 -2
- package/include/numkong/curved/genoa.h +2 -2
- package/include/numkong/curved/haswell.h +2 -2
- package/include/numkong/curved/neon.h +2 -2
- package/include/numkong/curved/neonbfdot.h +2 -2
- package/include/numkong/curved/rvv.h +2 -2
- package/include/numkong/curved/skylake.h +2 -2
- package/include/numkong/curved/smef64.h +2 -2
- package/include/numkong/dot/alder.h +2 -2
- package/include/numkong/dot/diamond.h +2 -2
- package/include/numkong/dot/genoa.h +2 -2
- package/include/numkong/dot/haswell.h +2 -2
- package/include/numkong/dot/icelake.h +2 -2
- package/include/numkong/dot/loongsonasx.h +2 -2
- package/include/numkong/dot/neon.h +2 -2
- package/include/numkong/dot/neonbfdot.h +2 -2
- package/include/numkong/dot/neonfhm.h +2 -2
- package/include/numkong/dot/neonfp8.h +2 -2
- package/include/numkong/dot/neonsdot.h +2 -2
- package/include/numkong/dot/rvv.h +2 -2
- package/include/numkong/dot/rvvbb.h +2 -2
- package/include/numkong/dot/rvvbf16.h +2 -2
- package/include/numkong/dot/rvvhalf.h +2 -2
- package/include/numkong/dot/sapphire.h +2 -2
- package/include/numkong/dot/sierra.h +2 -2
- package/include/numkong/dot/skylake.h +2 -2
- package/include/numkong/dot/sve.h +2 -2
- package/include/numkong/dot/svebfdot.h +2 -2
- package/include/numkong/dot/svehalf.h +2 -2
- package/include/numkong/dot/svesdot.h +2 -2
- package/include/numkong/dots/alder.h +2 -2
- package/include/numkong/dots/diamond.h +2 -2
- package/include/numkong/dots/genoa.h +2 -2
- package/include/numkong/dots/haswell.h +2 -2
- package/include/numkong/dots/icelake.h +2 -2
- package/include/numkong/dots/loongsonasx.h +2 -2
- package/include/numkong/dots/neon.h +2 -2
- package/include/numkong/dots/neonbfdot.h +2 -2
- package/include/numkong/dots/neonfhm.h +2 -2
- package/include/numkong/dots/neonfp8.h +2 -2
- package/include/numkong/dots/neonsdot.h +2 -2
- package/include/numkong/dots/powervsx.h +2 -2
- package/include/numkong/dots/rvv.h +2 -2
- package/include/numkong/dots/sapphireamx.h +2 -2
- package/include/numkong/dots/sierra.h +2 -2
- package/include/numkong/dots/skylake.h +2 -2
- package/include/numkong/dots/sme.h +10 -10
- package/include/numkong/dots/smebi32.h +2 -2
- package/include/numkong/dots/smef64.h +2 -2
- package/include/numkong/dots/smehalf.h +2 -2
- package/include/numkong/each/haswell.h +2 -2
- package/include/numkong/each/icelake.h +2 -2
- package/include/numkong/each/neon.h +2 -2
- package/include/numkong/each/neonbfdot.h +2 -2
- package/include/numkong/each/neonhalf.h +2 -2
- package/include/numkong/each/rvv.h +2 -2
- package/include/numkong/each/sapphire.h +2 -2
- package/include/numkong/each/skylake.h +2 -2
- package/include/numkong/geospatial/haswell.h +2 -2
- package/include/numkong/geospatial/neon.h +2 -2
- package/include/numkong/geospatial/rvv.h +2 -2
- package/include/numkong/geospatial/skylake.h +2 -2
- package/include/numkong/maxsim/alder.h +2 -2
- package/include/numkong/maxsim/genoa.h +2 -2
- package/include/numkong/maxsim/haswell.h +2 -2
- package/include/numkong/maxsim/icelake.h +2 -2
- package/include/numkong/maxsim/neonsdot.h +2 -2
- package/include/numkong/maxsim/sapphireamx.h +2 -2
- package/include/numkong/maxsim/sme.h +2 -2
- package/include/numkong/mesh/haswell.h +2 -2
- package/include/numkong/mesh/neon.h +2 -2
- package/include/numkong/mesh/neonbfdot.h +2 -2
- package/include/numkong/mesh/rvv.h +2 -2
- package/include/numkong/mesh/skylake.h +2 -2
- package/include/numkong/numkong.h +1 -1
- package/include/numkong/probability/haswell.h +2 -2
- package/include/numkong/probability/neon.h +2 -2
- package/include/numkong/probability/rvv.h +2 -2
- package/include/numkong/probability/skylake.h +2 -2
- package/include/numkong/reduce/alder.h +2 -2
- package/include/numkong/reduce/genoa.h +2 -2
- package/include/numkong/reduce/haswell.h +2 -2
- package/include/numkong/reduce/icelake.h +2 -2
- package/include/numkong/reduce/neon.h +2 -2
- package/include/numkong/reduce/neonbfdot.h +2 -2
- package/include/numkong/reduce/neonfhm.h +2 -2
- package/include/numkong/reduce/neonsdot.h +2 -2
- package/include/numkong/reduce/rvv.h +2 -2
- package/include/numkong/reduce/sierra.h +2 -2
- package/include/numkong/reduce/skylake.h +2 -2
- package/include/numkong/scalar/haswell.h +2 -2
- package/include/numkong/scalar/loongsonasx.h +2 -2
- package/include/numkong/scalar/neon.h +2 -2
- package/include/numkong/scalar/neonhalf.h +2 -2
- package/include/numkong/scalar/powervsx.h +2 -2
- package/include/numkong/scalar/rvv.h +2 -2
- package/include/numkong/scalar/sapphire.h +2 -2
- package/include/numkong/set/haswell.h +2 -2
- package/include/numkong/set/icelake.h +2 -2
- package/include/numkong/set/loongsonasx.h +2 -2
- package/include/numkong/set/neon.h +2 -2
- package/include/numkong/set/powervsx.h +2 -2
- package/include/numkong/set/rvv.h +2 -2
- package/include/numkong/set/rvvbb.h +2 -2
- package/include/numkong/set/sve.h +2 -2
- package/include/numkong/sets/haswell.h +2 -2
- package/include/numkong/sets/icelake.h +2 -2
- package/include/numkong/sets/loongsonasx.h +2 -2
- package/include/numkong/sets/neon.h +2 -2
- package/include/numkong/sets/powervsx.h +2 -2
- package/include/numkong/sets/smebi32.h +2 -2
- package/include/numkong/sparse/icelake.h +2 -2
- package/include/numkong/sparse/neon.h +2 -2
- package/include/numkong/sparse/sve2.h +2 -2
- package/include/numkong/sparse/turin.h +2 -2
- package/include/numkong/spatial/alder.h +2 -2
- package/include/numkong/spatial/diamond.h +2 -2
- package/include/numkong/spatial/genoa.h +2 -2
- package/include/numkong/spatial/haswell.h +2 -2
- package/include/numkong/spatial/icelake.h +2 -2
- package/include/numkong/spatial/loongsonasx.h +2 -2
- package/include/numkong/spatial/neon.h +2 -2
- package/include/numkong/spatial/neonbfdot.h +2 -2
- package/include/numkong/spatial/neonfp8.h +2 -2
- package/include/numkong/spatial/neonsdot.h +2 -2
- package/include/numkong/spatial/powervsx.h +2 -2
- package/include/numkong/spatial/rvv.h +2 -2
- package/include/numkong/spatial/rvvbf16.h +2 -2
- package/include/numkong/spatial/rvvhalf.h +2 -2
- package/include/numkong/spatial/sierra.h +2 -2
- package/include/numkong/spatial/skylake.h +2 -2
- package/include/numkong/spatial/sve.h +2 -2
- package/include/numkong/spatial/svebfdot.h +2 -2
- package/include/numkong/spatial/svehalf.h +2 -2
- package/include/numkong/spatial/svesdot.h +2 -2
- package/include/numkong/spatials/alder.h +2 -2
- package/include/numkong/spatials/diamond.h +2 -2
- package/include/numkong/spatials/genoa.h +2 -2
- package/include/numkong/spatials/haswell.h +2 -2
- package/include/numkong/spatials/icelake.h +2 -2
- package/include/numkong/spatials/loongsonasx.h +2 -2
- package/include/numkong/spatials/neon.h +2 -2
- package/include/numkong/spatials/neonbfdot.h +2 -2
- package/include/numkong/spatials/neonfhm.h +2 -2
- package/include/numkong/spatials/neonfp8.h +2 -2
- package/include/numkong/spatials/neonsdot.h +2 -2
- package/include/numkong/spatials/powervsx.h +2 -2
- package/include/numkong/spatials/rvv.h +2 -2
- package/include/numkong/spatials/sapphireamx.h +2 -2
- package/include/numkong/spatials/sierra.h +2 -2
- package/include/numkong/spatials/skylake.h +2 -2
- package/include/numkong/spatials/sme.h +2 -2
- package/include/numkong/spatials/smef64.h +2 -2
- package/include/numkong/trigonometry/haswell.h +2 -2
- package/include/numkong/trigonometry/neon.h +2 -2
- package/include/numkong/trigonometry/rvv.h +2 -2
- package/include/numkong/trigonometry/skylake.h +2 -2
- package/include/numkong/types.h +88 -80
- package/package.json +7 -7
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
#ifndef NK_NUMKONG_H
|
|
12
12
|
#define NK_NUMKONG_H
|
|
13
13
|
|
|
14
|
-
#include "numkong/capabilities.h" // Runtime detection, like `
|
|
14
|
+
#include "numkong/capabilities.h" // Runtime detection, like `nk_capabilities_x8664_`
|
|
15
15
|
#include "numkong/scalar.h" // Scalar math: sqrt, rsqrt, fma, saturating, order, like `nk_f32_sqrt`
|
|
16
16
|
#include "numkong/cast.h" // Type conversions, like `nk_cast`
|
|
17
17
|
#include "numkong/set.h" // Hamming, Jaccard, like `nk_hamming_u1`
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_PROBABILITY_HASWELL_H
|
|
10
10
|
#define NK_PROBABILITY_HASWELL_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_X8664_
|
|
13
13
|
#if NK_TARGET_HASWELL
|
|
14
14
|
|
|
15
15
|
#include "numkong/types.h"
|
|
@@ -263,5 +263,5 @@ nk_jsd_f64_haswell_cycle:
|
|
|
263
263
|
#endif
|
|
264
264
|
|
|
265
265
|
#endif // NK_TARGET_HASWELL
|
|
266
|
-
#endif //
|
|
266
|
+
#endif // NK_TARGET_X8664_
|
|
267
267
|
#endif // NK_PROBABILITY_HASWELL_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_PROBABILITY_NEON_H
|
|
10
10
|
#define NK_PROBABILITY_NEON_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_ARM64_
|
|
13
13
|
|
|
14
14
|
#include "numkong/types.h"
|
|
15
15
|
#include "numkong/cast/serial.h" // `nk_partial_load_b16x4_serial_`, `nk_partial_load_b32x4_serial_`
|
|
@@ -251,5 +251,5 @@ nk_jsd_f16_neon_cycle:
|
|
|
251
251
|
} // extern "C"
|
|
252
252
|
#endif
|
|
253
253
|
|
|
254
|
-
#endif //
|
|
254
|
+
#endif // NK_TARGET_ARM64_
|
|
255
255
|
#endif // NK_PROBABILITY_NEON_H
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
#ifndef NK_PROBABILITY_RVV_H
|
|
18
18
|
#define NK_PROBABILITY_RVV_H
|
|
19
19
|
|
|
20
|
-
#if
|
|
20
|
+
#if NK_TARGET_RISCV64_
|
|
21
21
|
#if NK_TARGET_RVV
|
|
22
22
|
|
|
23
23
|
#include "numkong/types.h"
|
|
@@ -410,5 +410,5 @@ NK_PUBLIC void nk_jsd_bf16_rvv(nk_bf16_t const *a, nk_bf16_t const *b, nk_size_t
|
|
|
410
410
|
#endif
|
|
411
411
|
|
|
412
412
|
#endif // NK_TARGET_RVV
|
|
413
|
-
#endif //
|
|
413
|
+
#endif // NK_TARGET_RISCV64_
|
|
414
414
|
#endif // NK_PROBABILITY_RVV_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_PROBABILITY_SKYLAKE_H
|
|
10
10
|
#define NK_PROBABILITY_SKYLAKE_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_X8664_
|
|
13
13
|
#if NK_TARGET_SKYLAKE
|
|
14
14
|
|
|
15
15
|
#include "numkong/types.h"
|
|
@@ -320,5 +320,5 @@ nk_jsd_f16_skylake_cycle:
|
|
|
320
320
|
#endif
|
|
321
321
|
|
|
322
322
|
#endif // NK_TARGET_SKYLAKE
|
|
323
|
-
#endif //
|
|
323
|
+
#endif // NK_TARGET_X8664_
|
|
324
324
|
#endif // NK_PROBABILITY_SKYLAKE_H
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
#ifndef NK_REDUCE_ALDER_H
|
|
15
15
|
#define NK_REDUCE_ALDER_H
|
|
16
16
|
|
|
17
|
-
#if
|
|
17
|
+
#if NK_TARGET_X8664_
|
|
18
18
|
#if NK_TARGET_ALDER
|
|
19
19
|
|
|
20
20
|
#include "numkong/types.h"
|
|
@@ -632,5 +632,5 @@ NK_PUBLIC void nk_reduce_moments_e2m3_alder( //
|
|
|
632
632
|
#endif
|
|
633
633
|
|
|
634
634
|
#endif // NK_TARGET_ALDER
|
|
635
|
-
#endif //
|
|
635
|
+
#endif // NK_TARGET_X8664_
|
|
636
636
|
#endif // NK_REDUCE_ALDER_H
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
#ifndef NK_REDUCE_GENOA_H
|
|
19
19
|
#define NK_REDUCE_GENOA_H
|
|
20
20
|
|
|
21
|
-
#if
|
|
21
|
+
#if NK_TARGET_X8664_
|
|
22
22
|
#if NK_TARGET_GENOA
|
|
23
23
|
|
|
24
24
|
#include "numkong/reduce/serial.h"
|
|
@@ -197,5 +197,5 @@ NK_PUBLIC void nk_reduce_moments_e5m2_genoa( //
|
|
|
197
197
|
#endif
|
|
198
198
|
|
|
199
199
|
#endif // NK_TARGET_GENOA
|
|
200
|
-
#endif //
|
|
200
|
+
#endif // NK_TARGET_X8664_
|
|
201
201
|
#endif // NK_REDUCE_GENOA_H
|
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
#ifndef NK_REDUCE_HASWELL_H
|
|
28
28
|
#define NK_REDUCE_HASWELL_H
|
|
29
29
|
|
|
30
|
-
#if
|
|
30
|
+
#if NK_TARGET_X8664_
|
|
31
31
|
#if NK_TARGET_HASWELL
|
|
32
32
|
|
|
33
33
|
#include "numkong/types.h"
|
|
@@ -3781,5 +3781,5 @@ NK_PUBLIC void nk_reduce_moments_u1_haswell( //
|
|
|
3781
3781
|
#endif
|
|
3782
3782
|
|
|
3783
3783
|
#endif // NK_TARGET_HASWELL
|
|
3784
|
-
#endif //
|
|
3784
|
+
#endif // NK_TARGET_X8664_
|
|
3785
3785
|
#endif // NK_REDUCE_HASWELL_H
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
#ifndef NK_REDUCE_ICELAKE_H
|
|
16
16
|
#define NK_REDUCE_ICELAKE_H
|
|
17
17
|
|
|
18
|
-
#if
|
|
18
|
+
#if NK_TARGET_X8664_
|
|
19
19
|
#if NK_TARGET_ICELAKE
|
|
20
20
|
|
|
21
21
|
#include "numkong/reduce/serial.h"
|
|
@@ -545,5 +545,5 @@ NK_PUBLIC void nk_reduce_moments_e3m2_icelake( //
|
|
|
545
545
|
#endif
|
|
546
546
|
|
|
547
547
|
#endif // NK_TARGET_ICELAKE
|
|
548
|
-
#endif //
|
|
548
|
+
#endif // NK_TARGET_X8664_
|
|
549
549
|
#endif // NK_REDUCE_ICELAKE_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_REDUCE_NEON_H
|
|
10
10
|
#define NK_REDUCE_NEON_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_ARM64_
|
|
13
13
|
#if NK_TARGET_NEON
|
|
14
14
|
|
|
15
15
|
#include "numkong/types.h" // `nk_size_t`
|
|
@@ -3947,5 +3947,5 @@ NK_PUBLIC void nk_reduce_moments_f16_neon( //
|
|
|
3947
3947
|
#endif
|
|
3948
3948
|
|
|
3949
3949
|
#endif // NK_TARGET_NEON
|
|
3950
|
-
#endif //
|
|
3950
|
+
#endif // NK_TARGET_ARM64_
|
|
3951
3951
|
#endif // NK_REDUCE_NEON_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_REDUCE_NEONBFDOT_H
|
|
10
10
|
#define NK_REDUCE_NEONBFDOT_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_ARM64_
|
|
13
13
|
#if NK_TARGET_NEONBFDOT
|
|
14
14
|
|
|
15
15
|
#include "numkong/types.h" // `nk_bf16_t`
|
|
@@ -138,5 +138,5 @@ NK_PUBLIC void nk_reduce_moments_bf16_neonbfdot( //
|
|
|
138
138
|
#endif
|
|
139
139
|
|
|
140
140
|
#endif // NK_TARGET_NEONBFDOT
|
|
141
|
-
#endif //
|
|
141
|
+
#endif // NK_TARGET_ARM64_
|
|
142
142
|
#endif // NK_REDUCE_NEONBFDOT_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_REDUCE_NEONFHM_H
|
|
10
10
|
#define NK_REDUCE_NEONFHM_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_ARM64_
|
|
13
13
|
#if NK_TARGET_NEONFHM
|
|
14
14
|
|
|
15
15
|
#include "numkong/types.h" // `nk_e4m3_t`
|
|
@@ -289,5 +289,5 @@ NK_PUBLIC void nk_reduce_moments_e5m2_neonfhm( //
|
|
|
289
289
|
#endif
|
|
290
290
|
|
|
291
291
|
#endif // NK_TARGET_NEONFHM
|
|
292
|
-
#endif //
|
|
292
|
+
#endif // NK_TARGET_ARM64_
|
|
293
293
|
#endif // NK_REDUCE_NEONFHM_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_REDUCE_NEONSDOT_H
|
|
10
10
|
#define NK_REDUCE_NEONSDOT_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_ARM64_
|
|
13
13
|
#if NK_TARGET_NEONSDOT
|
|
14
14
|
|
|
15
15
|
#include "numkong/types.h"
|
|
@@ -353,5 +353,5 @@ NK_PUBLIC void nk_reduce_moments_e2m3_neonsdot( //
|
|
|
353
353
|
#endif
|
|
354
354
|
|
|
355
355
|
#endif // NK_TARGET_NEONSDOT
|
|
356
|
-
#endif //
|
|
356
|
+
#endif // NK_TARGET_ARM64_
|
|
357
357
|
#endif // NK_REDUCE_NEONSDOT_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_REDUCE_RVV_H
|
|
10
10
|
#define NK_REDUCE_RVV_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_RISCV64_
|
|
13
13
|
#if NK_TARGET_RVV
|
|
14
14
|
|
|
15
15
|
#include "numkong/types.h"
|
|
@@ -3529,5 +3529,5 @@ NK_PUBLIC void nk_reduce_minmax_e3m2_rvv( //
|
|
|
3529
3529
|
#endif
|
|
3530
3530
|
|
|
3531
3531
|
#endif // NK_TARGET_RVV
|
|
3532
|
-
#endif //
|
|
3532
|
+
#endif // NK_TARGET_RISCV64_
|
|
3533
3533
|
#endif // NK_REDUCE_RVV_H
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
#ifndef NK_REDUCE_SIERRA_H
|
|
14
14
|
#define NK_REDUCE_SIERRA_H
|
|
15
15
|
|
|
16
|
-
#if
|
|
16
|
+
#if NK_TARGET_X8664_
|
|
17
17
|
#if NK_TARGET_SIERRA
|
|
18
18
|
|
|
19
19
|
#include "numkong/types.h"
|
|
@@ -334,5 +334,5 @@ NK_PUBLIC void nk_reduce_moments_e2m3_sierra( //
|
|
|
334
334
|
#endif
|
|
335
335
|
|
|
336
336
|
#endif // NK_TARGET_SIERRA
|
|
337
|
-
#endif //
|
|
337
|
+
#endif // NK_TARGET_X8664_
|
|
338
338
|
#endif // NK_REDUCE_SIERRA_H
|
|
@@ -35,7 +35,7 @@
|
|
|
35
35
|
#ifndef NK_REDUCE_SKYLAKE_H
|
|
36
36
|
#define NK_REDUCE_SKYLAKE_H
|
|
37
37
|
|
|
38
|
-
#if
|
|
38
|
+
#if NK_TARGET_X8664_
|
|
39
39
|
#if NK_TARGET_SKYLAKE
|
|
40
40
|
|
|
41
41
|
#include "numkong/types.h"
|
|
@@ -3790,5 +3790,5 @@ NK_PUBLIC void nk_reduce_minmax_f16_skylake( //
|
|
|
3790
3790
|
#endif
|
|
3791
3791
|
|
|
3792
3792
|
#endif // NK_TARGET_SKYLAKE
|
|
3793
|
-
#endif //
|
|
3793
|
+
#endif // NK_TARGET_X8664_
|
|
3794
3794
|
#endif // NK_REDUCE_SKYLAKE_H
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
#ifndef NK_SCALAR_HASWELL_H
|
|
20
20
|
#define NK_SCALAR_HASWELL_H
|
|
21
21
|
|
|
22
|
-
#if
|
|
22
|
+
#if NK_TARGET_X8664_
|
|
23
23
|
#if NK_TARGET_HASWELL
|
|
24
24
|
|
|
25
25
|
#include "numkong/types.h"
|
|
@@ -118,5 +118,5 @@ NK_PUBLIC nk_i64_t nk_i64_saturating_mul_haswell(nk_i64_t a, nk_i64_t b) {
|
|
|
118
118
|
#endif
|
|
119
119
|
|
|
120
120
|
#endif // NK_TARGET_HASWELL
|
|
121
|
-
#endif //
|
|
121
|
+
#endif // NK_TARGET_X8664_
|
|
122
122
|
#endif // NK_SCALAR_HASWELL_H
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
#ifndef NK_SCALAR_LOONGSONASX_H
|
|
15
15
|
#define NK_SCALAR_LOONGSONASX_H
|
|
16
16
|
|
|
17
|
-
#if
|
|
17
|
+
#if NK_TARGET_LOONGARCH64_
|
|
18
18
|
#if NK_TARGET_LOONGSONASX
|
|
19
19
|
|
|
20
20
|
#include "numkong/types.h"
|
|
@@ -70,5 +70,5 @@ NK_PUBLIC nk_f64_t nk_f64_rsqrt_loongsonasx(nk_f64_t x) { return 1.0 / nk_f64_sq
|
|
|
70
70
|
#endif
|
|
71
71
|
|
|
72
72
|
#endif // NK_TARGET_LOONGSONASX
|
|
73
|
-
#endif //
|
|
73
|
+
#endif // NK_TARGET_LOONGARCH64_
|
|
74
74
|
#endif // NK_SCALAR_LOONGSONASX_H
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
#ifndef NK_SCALAR_NEON_H
|
|
20
20
|
#define NK_SCALAR_NEON_H
|
|
21
21
|
|
|
22
|
-
#if
|
|
22
|
+
#if NK_TARGET_ARM64_
|
|
23
23
|
#if NK_TARGET_NEON
|
|
24
24
|
|
|
25
25
|
#include "numkong/types.h"
|
|
@@ -118,5 +118,5 @@ NK_PUBLIC nk_i64_t nk_i64_saturating_mul_neon(nk_i64_t a, nk_i64_t b) {
|
|
|
118
118
|
#endif
|
|
119
119
|
|
|
120
120
|
#endif // NK_TARGET_NEON
|
|
121
|
-
#endif //
|
|
121
|
+
#endif // NK_TARGET_ARM64_
|
|
122
122
|
#endif // NK_SCALAR_NEON_H
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
#ifndef NK_SCALAR_NEONHALF_H
|
|
14
14
|
#define NK_SCALAR_NEONHALF_H
|
|
15
15
|
|
|
16
|
-
#if
|
|
16
|
+
#if NK_TARGET_ARM64_
|
|
17
17
|
#if NK_TARGET_NEONHALF
|
|
18
18
|
|
|
19
19
|
#include "numkong/types.h"
|
|
@@ -66,5 +66,5 @@ NK_PUBLIC nk_f16_t nk_f16_fma_neonhalf(nk_f16_t a, nk_f16_t b, nk_f16_t c) {
|
|
|
66
66
|
#endif
|
|
67
67
|
|
|
68
68
|
#endif // NK_TARGET_NEONHALF
|
|
69
|
-
#endif //
|
|
69
|
+
#endif // NK_TARGET_ARM64_
|
|
70
70
|
#endif // NK_SCALAR_NEONHALF_H
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
#ifndef NK_SCALAR_POWERVSX_H
|
|
20
20
|
#define NK_SCALAR_POWERVSX_H
|
|
21
21
|
|
|
22
|
-
#if
|
|
22
|
+
#if NK_TARGET_POWER64_
|
|
23
23
|
#if NK_TARGET_POWERVSX
|
|
24
24
|
|
|
25
25
|
#include "numkong/types.h"
|
|
@@ -92,5 +92,5 @@ NK_PUBLIC nk_f64_t nk_f64_fma_powervsx(nk_f64_t a, nk_f64_t b, nk_f64_t c) {
|
|
|
92
92
|
#endif
|
|
93
93
|
|
|
94
94
|
#endif // NK_TARGET_POWERVSX
|
|
95
|
-
#endif //
|
|
95
|
+
#endif // NK_TARGET_POWER64_
|
|
96
96
|
#endif // NK_SCALAR_POWERVSX_H
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
#ifndef NK_SCALAR_RVV_H
|
|
13
13
|
#define NK_SCALAR_RVV_H
|
|
14
14
|
|
|
15
|
-
#if
|
|
15
|
+
#if NK_TARGET_RISCV64_
|
|
16
16
|
#if NK_TARGET_RVV
|
|
17
17
|
|
|
18
18
|
#include "numkong/types.h"
|
|
@@ -207,5 +207,5 @@ NK_PUBLIC nk_i64_t nk_i64_saturating_mul_rvv(nk_i64_t a, nk_i64_t b) {
|
|
|
207
207
|
#endif
|
|
208
208
|
|
|
209
209
|
#endif // NK_TARGET_RVV
|
|
210
|
-
#endif //
|
|
210
|
+
#endif // NK_TARGET_RISCV64_
|
|
211
211
|
#endif // NK_SCALAR_RVV_H
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
#ifndef NK_SCALAR_SAPPHIRE_H
|
|
12
12
|
#define NK_SCALAR_SAPPHIRE_H
|
|
13
13
|
|
|
14
|
-
#if
|
|
14
|
+
#if NK_TARGET_X8664_
|
|
15
15
|
#if NK_TARGET_SAPPHIRE
|
|
16
16
|
|
|
17
17
|
#include "numkong/types.h"
|
|
@@ -70,5 +70,5 @@ NK_PUBLIC nk_f16_t nk_f16_fma_sapphire(nk_f16_t a, nk_f16_t b, nk_f16_t c) {
|
|
|
70
70
|
#endif
|
|
71
71
|
|
|
72
72
|
#endif // NK_TARGET_SAPPHIRE
|
|
73
|
-
#endif //
|
|
73
|
+
#endif // NK_TARGET_X8664_
|
|
74
74
|
#endif // NK_SCALAR_SAPPHIRE_H
|
|
@@ -38,7 +38,7 @@
|
|
|
38
38
|
#ifndef NK_SET_HASWELL_H
|
|
39
39
|
#define NK_SET_HASWELL_H
|
|
40
40
|
|
|
41
|
-
#if
|
|
41
|
+
#if NK_TARGET_X8664_
|
|
42
42
|
#if NK_TARGET_HASWELL
|
|
43
43
|
|
|
44
44
|
#include "numkong/types.h"
|
|
@@ -330,5 +330,5 @@ NK_INTERNAL void nk_jaccard_f32x4_from_dot_haswell_(nk_b128_vec_t dots, nk_u32_t
|
|
|
330
330
|
#endif
|
|
331
331
|
|
|
332
332
|
#endif // NK_TARGET_HASWELL
|
|
333
|
-
#endif //
|
|
333
|
+
#endif // NK_TARGET_X8664_
|
|
334
334
|
#endif // NK_SET_HASWELL_H
|
|
@@ -37,7 +37,7 @@
|
|
|
37
37
|
#ifndef NK_SET_ICELAKE_H
|
|
38
38
|
#define NK_SET_ICELAKE_H
|
|
39
39
|
|
|
40
|
-
#if
|
|
40
|
+
#if NK_TARGET_X8664_
|
|
41
41
|
#if NK_TARGET_ICELAKE
|
|
42
42
|
|
|
43
43
|
#include "numkong/types.h"
|
|
@@ -481,5 +481,5 @@ NK_INTERNAL void nk_jaccard_f32x4_from_dot_icelake_(nk_b128_vec_t dots, nk_u32_t
|
|
|
481
481
|
#endif
|
|
482
482
|
|
|
483
483
|
#endif // NK_TARGET_ICELAKE
|
|
484
|
-
#endif //
|
|
484
|
+
#endif // NK_TARGET_X8664_
|
|
485
485
|
#endif // NK_SET_ICELAKE_H
|
|
@@ -33,7 +33,7 @@
|
|
|
33
33
|
#ifndef NK_SET_LOONGSONASX_H
|
|
34
34
|
#define NK_SET_LOONGSONASX_H
|
|
35
35
|
|
|
36
|
-
#if
|
|
36
|
+
#if NK_TARGET_LOONGARCH64_
|
|
37
37
|
#if NK_TARGET_LOONGSONASX
|
|
38
38
|
|
|
39
39
|
#include "numkong/types.h"
|
|
@@ -177,5 +177,5 @@ NK_INTERNAL void nk_jaccard_f32x4_from_dot_loongsonasx_(nk_b128_vec_t dots, nk_u
|
|
|
177
177
|
#endif
|
|
178
178
|
|
|
179
179
|
#endif // NK_TARGET_LOONGSONASX
|
|
180
|
-
#endif //
|
|
180
|
+
#endif // NK_TARGET_LOONGARCH64_
|
|
181
181
|
#endif // NK_SET_LOONGSONASX_H
|
|
@@ -41,7 +41,7 @@
|
|
|
41
41
|
#ifndef NK_SET_NEON_H
|
|
42
42
|
#define NK_SET_NEON_H
|
|
43
43
|
|
|
44
|
-
#if
|
|
44
|
+
#if NK_TARGET_ARM64_
|
|
45
45
|
#if NK_TARGET_NEON
|
|
46
46
|
|
|
47
47
|
#include "numkong/types.h" // `nk_u1x8_t`
|
|
@@ -359,5 +359,5 @@ NK_INTERNAL void nk_jaccard_f32x4_from_dot_neon_(nk_b128_vec_t dots, nk_u32_t qu
|
|
|
359
359
|
#endif
|
|
360
360
|
|
|
361
361
|
#endif // NK_TARGET_NEON
|
|
362
|
-
#endif //
|
|
362
|
+
#endif // NK_TARGET_ARM64_
|
|
363
363
|
#endif // NK_SET_NEON_H
|
|
@@ -40,7 +40,7 @@
|
|
|
40
40
|
#ifndef NK_SET_POWERVSX_H
|
|
41
41
|
#define NK_SET_POWERVSX_H
|
|
42
42
|
|
|
43
|
-
#if
|
|
43
|
+
#if NK_TARGET_POWER64_
|
|
44
44
|
#if NK_TARGET_POWERVSX
|
|
45
45
|
|
|
46
46
|
#include "numkong/types.h"
|
|
@@ -322,5 +322,5 @@ NK_INTERNAL void nk_jaccard_f32x4_from_dot_powervsx_(nk_b128_vec_t dots, nk_u32_
|
|
|
322
322
|
#endif
|
|
323
323
|
|
|
324
324
|
#endif // NK_TARGET_POWERVSX
|
|
325
|
-
#endif //
|
|
325
|
+
#endif // NK_TARGET_POWER64_
|
|
326
326
|
#endif // NK_SET_POWERVSX_H
|
|
@@ -33,7 +33,7 @@
|
|
|
33
33
|
#ifndef NK_SET_RVV_H
|
|
34
34
|
#define NK_SET_RVV_H
|
|
35
35
|
|
|
36
|
-
#if
|
|
36
|
+
#if NK_TARGET_RISCV64_
|
|
37
37
|
#if NK_TARGET_RVV
|
|
38
38
|
|
|
39
39
|
#include "numkong/types.h"
|
|
@@ -222,5 +222,5 @@ NK_PUBLIC void nk_jaccard_u16_rvv(nk_u16_t const *a, nk_u16_t const *b, nk_size_
|
|
|
222
222
|
#endif
|
|
223
223
|
|
|
224
224
|
#endif // NK_TARGET_RVV
|
|
225
|
-
#endif //
|
|
225
|
+
#endif // NK_TARGET_RISCV64_
|
|
226
226
|
#endif // NK_SET_RVV_H
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
#ifndef NK_SET_RVVBB_H
|
|
19
19
|
#define NK_SET_RVVBB_H
|
|
20
20
|
|
|
21
|
-
#if
|
|
21
|
+
#if NK_TARGET_RISCV64_
|
|
22
22
|
#if NK_TARGET_RVVBB
|
|
23
23
|
|
|
24
24
|
#include "numkong/types.h"
|
|
@@ -113,5 +113,5 @@ NK_PUBLIC void nk_jaccard_u1_rvvbb(nk_u1x8_t const *a, nk_u1x8_t const *b, nk_si
|
|
|
113
113
|
#endif
|
|
114
114
|
|
|
115
115
|
#endif // NK_TARGET_RVVBB
|
|
116
|
-
#endif //
|
|
116
|
+
#endif // NK_TARGET_RISCV64_
|
|
117
117
|
#endif // NK_SET_RVVBB_H
|
|
@@ -29,7 +29,7 @@
|
|
|
29
29
|
#ifndef NK_SET_SVE_H
|
|
30
30
|
#define NK_SET_SVE_H
|
|
31
31
|
|
|
32
|
-
#if
|
|
32
|
+
#if NK_TARGET_ARM64_
|
|
33
33
|
#if NK_TARGET_SVE
|
|
34
34
|
|
|
35
35
|
#include "numkong/types.h" // `nk_u1x8_t`
|
|
@@ -182,5 +182,5 @@ NK_PUBLIC void nk_jaccard_u16_sve(nk_u16_t const *a, nk_u16_t const *b, nk_size_
|
|
|
182
182
|
#endif
|
|
183
183
|
|
|
184
184
|
#endif // NK_TARGET_SVE
|
|
185
|
-
#endif //
|
|
185
|
+
#endif // NK_TARGET_ARM64_
|
|
186
186
|
#endif // NK_SET_SVE_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_SETS_HASWELL_H
|
|
10
10
|
#define NK_SETS_HASWELL_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_X8664_
|
|
13
13
|
#if NK_TARGET_HASWELL
|
|
14
14
|
|
|
15
15
|
#include "numkong/set/haswell.h"
|
|
@@ -59,5 +59,5 @@ nk_define_cross_normalized_symmetric_(jaccard, u1, haswell, u1x8, u32, /*norm_va
|
|
|
59
59
|
#endif
|
|
60
60
|
|
|
61
61
|
#endif // NK_TARGET_HASWELL
|
|
62
|
-
#endif //
|
|
62
|
+
#endif // NK_TARGET_X8664_
|
|
63
63
|
#endif // NK_SETS_HASWELL_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_SETS_ICELAKE_H
|
|
10
10
|
#define NK_SETS_ICELAKE_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_X8664_
|
|
13
13
|
#if NK_TARGET_ICELAKE
|
|
14
14
|
|
|
15
15
|
#include "numkong/set/icelake.h"
|
|
@@ -62,5 +62,5 @@ nk_define_cross_normalized_symmetric_(jaccard, u1, icelake, u1x8, u32, /*norm_va
|
|
|
62
62
|
#endif
|
|
63
63
|
|
|
64
64
|
#endif // NK_TARGET_ICELAKE
|
|
65
|
-
#endif //
|
|
65
|
+
#endif // NK_TARGET_X8664_
|
|
66
66
|
#endif // NK_SETS_ICELAKE_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_SETS_LOONGSONASX_H
|
|
10
10
|
#define NK_SETS_LOONGSONASX_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_LOONGARCH64_
|
|
13
13
|
#if NK_TARGET_LOONGSONASX
|
|
14
14
|
|
|
15
15
|
#include "numkong/set/loongsonasx.h"
|
|
@@ -48,5 +48,5 @@ nk_define_cross_normalized_symmetric_(jaccard, u1, loongsonasx, u1x8, u32, /*nor
|
|
|
48
48
|
#endif
|
|
49
49
|
|
|
50
50
|
#endif // NK_TARGET_LOONGSONASX
|
|
51
|
-
#endif //
|
|
51
|
+
#endif // NK_TARGET_LOONGARCH64_
|
|
52
52
|
#endif // NK_SETS_LOONGSONASX_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_SETS_NEON_H
|
|
10
10
|
#define NK_SETS_NEON_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_ARM64_
|
|
13
13
|
#if NK_TARGET_NEON
|
|
14
14
|
|
|
15
15
|
#include "numkong/set/neon.h"
|
|
@@ -57,5 +57,5 @@ nk_define_cross_normalized_symmetric_(jaccard, u1, neon, u1x8, u32, /*norm_value
|
|
|
57
57
|
#endif
|
|
58
58
|
|
|
59
59
|
#endif // NK_TARGET_NEON
|
|
60
|
-
#endif //
|
|
60
|
+
#endif // NK_TARGET_ARM64_
|
|
61
61
|
#endif // NK_SETS_NEON_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_SETS_POWERVSX_H
|
|
10
10
|
#define NK_SETS_POWERVSX_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_POWER64_
|
|
13
13
|
#if NK_TARGET_POWERVSX
|
|
14
14
|
|
|
15
15
|
#include "numkong/set/powervsx.h"
|
|
@@ -61,5 +61,5 @@ nk_define_cross_normalized_symmetric_(jaccard, u1, powervsx, u1x8, u32, /*norm_v
|
|
|
61
61
|
#endif
|
|
62
62
|
|
|
63
63
|
#endif // NK_TARGET_POWERVSX
|
|
64
|
-
#endif //
|
|
64
|
+
#endif // NK_TARGET_POWER64_
|
|
65
65
|
#endif // NK_SETS_POWERVSX_H
|
|
@@ -35,7 +35,7 @@
|
|
|
35
35
|
#ifndef NK_SETS_SMEBI32_H
|
|
36
36
|
#define NK_SETS_SMEBI32_H
|
|
37
37
|
|
|
38
|
-
#if
|
|
38
|
+
#if NK_TARGET_ARM64_
|
|
39
39
|
#if NK_TARGET_SMEBI32
|
|
40
40
|
|
|
41
41
|
#include "numkong/types.h"
|
|
@@ -1125,6 +1125,6 @@ NK_PUBLIC void nk_jaccards_symmetric_u1_smebi32(nk_u1x8_t const *vectors, nk_siz
|
|
|
1125
1125
|
#endif
|
|
1126
1126
|
|
|
1127
1127
|
#endif // NK_TARGET_SMEBI32
|
|
1128
|
-
#endif //
|
|
1128
|
+
#endif // NK_TARGET_ARM64_
|
|
1129
1129
|
|
|
1130
1130
|
#endif // NK_SETS_SMEBI32_H
|
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
#ifndef NK_SPARSE_ICELAKE_H
|
|
23
23
|
#define NK_SPARSE_ICELAKE_H
|
|
24
24
|
|
|
25
|
-
#if
|
|
25
|
+
#if NK_TARGET_X8664_
|
|
26
26
|
#if NK_TARGET_ICELAKE
|
|
27
27
|
|
|
28
28
|
#include "numkong/types.h"
|
|
@@ -459,5 +459,5 @@ NK_PUBLIC void nk_sparse_dot_u32f32_icelake( //
|
|
|
459
459
|
#endif
|
|
460
460
|
|
|
461
461
|
#endif // NK_TARGET_ICELAKE
|
|
462
|
-
#endif //
|
|
462
|
+
#endif // NK_TARGET_X8664_
|
|
463
463
|
#endif // NK_SPARSE_ICELAKE_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_SPARSE_NEON_H
|
|
10
10
|
#define NK_SPARSE_NEON_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_ARM64_
|
|
13
13
|
#if NK_TARGET_NEON
|
|
14
14
|
|
|
15
15
|
#include "numkong/types.h"
|
|
@@ -284,5 +284,5 @@ NK_PUBLIC void nk_sparse_intersect_u64_neon( //
|
|
|
284
284
|
#endif
|
|
285
285
|
|
|
286
286
|
#endif // NK_TARGET_NEON
|
|
287
|
-
#endif //
|
|
287
|
+
#endif // NK_TARGET_ARM64_
|
|
288
288
|
#endif // NK_SPARSE_NEON_H
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#ifndef NK_SPARSE_SVE2_H
|
|
10
10
|
#define NK_SPARSE_SVE2_H
|
|
11
11
|
|
|
12
|
-
#if
|
|
12
|
+
#if NK_TARGET_ARM64_
|
|
13
13
|
|
|
14
14
|
#include "numkong/types.h"
|
|
15
15
|
|
|
@@ -499,5 +499,5 @@ NK_PUBLIC void nk_sparse_dot_u16bf16_sve2( //
|
|
|
499
499
|
} // extern "C"
|
|
500
500
|
#endif
|
|
501
501
|
|
|
502
|
-
#endif //
|
|
502
|
+
#endif // NK_TARGET_ARM64_
|
|
503
503
|
#endif // NK_SPARSE_SVE2_H
|