numkong 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +495 -0
- package/binding.gyp +540 -0
- package/c/dispatch.h +512 -0
- package/c/dispatch_bf16.c +389 -0
- package/c/dispatch_bf16c.c +52 -0
- package/c/dispatch_e2m3.c +263 -0
- package/c/dispatch_e3m2.c +243 -0
- package/c/dispatch_e4m3.c +276 -0
- package/c/dispatch_e5m2.c +272 -0
- package/c/dispatch_f16.c +376 -0
- package/c/dispatch_f16c.c +58 -0
- package/c/dispatch_f32.c +378 -0
- package/c/dispatch_f32c.c +99 -0
- package/c/dispatch_f64.c +296 -0
- package/c/dispatch_f64c.c +98 -0
- package/c/dispatch_i16.c +96 -0
- package/c/dispatch_i32.c +89 -0
- package/c/dispatch_i4.c +150 -0
- package/c/dispatch_i64.c +86 -0
- package/c/dispatch_i8.c +289 -0
- package/c/dispatch_other.c +330 -0
- package/c/dispatch_u1.c +148 -0
- package/c/dispatch_u16.c +124 -0
- package/c/dispatch_u32.c +118 -0
- package/c/dispatch_u4.c +150 -0
- package/c/dispatch_u64.c +102 -0
- package/c/dispatch_u8.c +303 -0
- package/c/numkong.c +950 -0
- package/include/README.md +573 -0
- package/include/module.modulemap +129 -0
- package/include/numkong/attention/sapphireamx.h +1361 -0
- package/include/numkong/attention/sme.h +2066 -0
- package/include/numkong/attention.h +49 -0
- package/include/numkong/capabilities.h +748 -0
- package/include/numkong/cast/README.md +262 -0
- package/include/numkong/cast/haswell.h +975 -0
- package/include/numkong/cast/icelake.h +470 -0
- package/include/numkong/cast/neon.h +1192 -0
- package/include/numkong/cast/rvv.h +1021 -0
- package/include/numkong/cast/sapphire.h +262 -0
- package/include/numkong/cast/serial.h +2262 -0
- package/include/numkong/cast/skylake.h +856 -0
- package/include/numkong/cast/v128relaxed.h +180 -0
- package/include/numkong/cast.h +230 -0
- package/include/numkong/curved/README.md +223 -0
- package/include/numkong/curved/genoa.h +182 -0
- package/include/numkong/curved/haswell.h +276 -0
- package/include/numkong/curved/neon.h +205 -0
- package/include/numkong/curved/neonbfdot.h +212 -0
- package/include/numkong/curved/neonhalf.h +212 -0
- package/include/numkong/curved/rvv.h +305 -0
- package/include/numkong/curved/serial.h +207 -0
- package/include/numkong/curved/skylake.h +457 -0
- package/include/numkong/curved/smef64.h +506 -0
- package/include/numkong/curved.h +517 -0
- package/include/numkong/curved.hpp +144 -0
- package/include/numkong/dot/README.md +425 -0
- package/include/numkong/dot/alder.h +563 -0
- package/include/numkong/dot/genoa.h +315 -0
- package/include/numkong/dot/haswell.h +1688 -0
- package/include/numkong/dot/icelake.h +883 -0
- package/include/numkong/dot/neon.h +818 -0
- package/include/numkong/dot/neonbfdot.h +244 -0
- package/include/numkong/dot/neonfhm.h +360 -0
- package/include/numkong/dot/neonhalf.h +198 -0
- package/include/numkong/dot/neonsdot.h +508 -0
- package/include/numkong/dot/rvv.h +714 -0
- package/include/numkong/dot/rvvbb.h +72 -0
- package/include/numkong/dot/rvvbf16.h +123 -0
- package/include/numkong/dot/rvvhalf.h +129 -0
- package/include/numkong/dot/sapphire.h +141 -0
- package/include/numkong/dot/serial.h +838 -0
- package/include/numkong/dot/sierra.h +405 -0
- package/include/numkong/dot/skylake.h +1084 -0
- package/include/numkong/dot/sve.h +379 -0
- package/include/numkong/dot/svebfdot.h +74 -0
- package/include/numkong/dot/svehalf.h +123 -0
- package/include/numkong/dot/v128relaxed.h +1258 -0
- package/include/numkong/dot.h +1070 -0
- package/include/numkong/dot.hpp +94 -0
- package/include/numkong/dots/README.md +496 -0
- package/include/numkong/dots/alder.h +114 -0
- package/include/numkong/dots/genoa.h +94 -0
- package/include/numkong/dots/haswell.h +295 -0
- package/include/numkong/dots/icelake.h +171 -0
- package/include/numkong/dots/neon.h +120 -0
- package/include/numkong/dots/neonbfdot.h +58 -0
- package/include/numkong/dots/neonfhm.h +94 -0
- package/include/numkong/dots/neonhalf.h +57 -0
- package/include/numkong/dots/neonsdot.h +108 -0
- package/include/numkong/dots/rvv.h +2486 -0
- package/include/numkong/dots/sapphireamx.h +3973 -0
- package/include/numkong/dots/serial.h +2844 -0
- package/include/numkong/dots/sierra.h +97 -0
- package/include/numkong/dots/skylake.h +196 -0
- package/include/numkong/dots/sme.h +5372 -0
- package/include/numkong/dots/smebi32.h +461 -0
- package/include/numkong/dots/smef64.h +1318 -0
- package/include/numkong/dots/smehalf.h +47 -0
- package/include/numkong/dots/v128relaxed.h +294 -0
- package/include/numkong/dots.h +2804 -0
- package/include/numkong/dots.hpp +639 -0
- package/include/numkong/each/README.md +469 -0
- package/include/numkong/each/haswell.h +1658 -0
- package/include/numkong/each/icelake.h +272 -0
- package/include/numkong/each/neon.h +1104 -0
- package/include/numkong/each/neonbfdot.h +212 -0
- package/include/numkong/each/neonhalf.h +410 -0
- package/include/numkong/each/rvv.h +1121 -0
- package/include/numkong/each/sapphire.h +477 -0
- package/include/numkong/each/serial.h +260 -0
- package/include/numkong/each/skylake.h +1562 -0
- package/include/numkong/each.h +2146 -0
- package/include/numkong/each.hpp +434 -0
- package/include/numkong/geospatial/README.md +147 -0
- package/include/numkong/geospatial/haswell.h +593 -0
- package/include/numkong/geospatial/neon.h +571 -0
- package/include/numkong/geospatial/rvv.h +701 -0
- package/include/numkong/geospatial/serial.h +309 -0
- package/include/numkong/geospatial/skylake.h +577 -0
- package/include/numkong/geospatial/v128relaxed.h +613 -0
- package/include/numkong/geospatial.h +453 -0
- package/include/numkong/geospatial.hpp +235 -0
- package/include/numkong/matrix.hpp +336 -0
- package/include/numkong/maxsim/README.md +187 -0
- package/include/numkong/maxsim/alder.h +511 -0
- package/include/numkong/maxsim/genoa.h +115 -0
- package/include/numkong/maxsim/haswell.h +553 -0
- package/include/numkong/maxsim/icelake.h +480 -0
- package/include/numkong/maxsim/neonsdot.h +394 -0
- package/include/numkong/maxsim/sapphireamx.h +877 -0
- package/include/numkong/maxsim/serial.h +490 -0
- package/include/numkong/maxsim/sme.h +929 -0
- package/include/numkong/maxsim/v128relaxed.h +280 -0
- package/include/numkong/maxsim.h +571 -0
- package/include/numkong/maxsim.hpp +133 -0
- package/include/numkong/mesh/README.md +227 -0
- package/include/numkong/mesh/haswell.h +2235 -0
- package/include/numkong/mesh/neon.h +1329 -0
- package/include/numkong/mesh/neonbfdot.h +842 -0
- package/include/numkong/mesh/neonhalf.h +616 -0
- package/include/numkong/mesh/rvv.h +916 -0
- package/include/numkong/mesh/serial.h +742 -0
- package/include/numkong/mesh/skylake.h +1135 -0
- package/include/numkong/mesh/v128relaxed.h +1052 -0
- package/include/numkong/mesh.h +652 -0
- package/include/numkong/mesh.hpp +762 -0
- package/include/numkong/numkong.h +78 -0
- package/include/numkong/numkong.hpp +57 -0
- package/include/numkong/probability/README.md +173 -0
- package/include/numkong/probability/haswell.h +267 -0
- package/include/numkong/probability/neon.h +225 -0
- package/include/numkong/probability/rvv.h +409 -0
- package/include/numkong/probability/serial.h +169 -0
- package/include/numkong/probability/skylake.h +324 -0
- package/include/numkong/probability.h +383 -0
- package/include/numkong/probability.hpp +120 -0
- package/include/numkong/random.h +50 -0
- package/include/numkong/random.hpp +285 -0
- package/include/numkong/reduce/README.md +547 -0
- package/include/numkong/reduce/alder.h +632 -0
- package/include/numkong/reduce/genoa.h +201 -0
- package/include/numkong/reduce/haswell.h +3783 -0
- package/include/numkong/reduce/icelake.h +549 -0
- package/include/numkong/reduce/neon.h +3841 -0
- package/include/numkong/reduce/neonbfdot.h +353 -0
- package/include/numkong/reduce/neonfhm.h +665 -0
- package/include/numkong/reduce/neonhalf.h +157 -0
- package/include/numkong/reduce/neonsdot.h +357 -0
- package/include/numkong/reduce/rvv.h +3407 -0
- package/include/numkong/reduce/serial.h +757 -0
- package/include/numkong/reduce/sierra.h +338 -0
- package/include/numkong/reduce/skylake.h +3792 -0
- package/include/numkong/reduce/v128relaxed.h +2302 -0
- package/include/numkong/reduce.h +1597 -0
- package/include/numkong/reduce.hpp +633 -0
- package/include/numkong/scalar/README.md +89 -0
- package/include/numkong/scalar/haswell.h +113 -0
- package/include/numkong/scalar/neon.h +122 -0
- package/include/numkong/scalar/neonhalf.h +70 -0
- package/include/numkong/scalar/rvv.h +211 -0
- package/include/numkong/scalar/sapphire.h +63 -0
- package/include/numkong/scalar/serial.h +332 -0
- package/include/numkong/scalar/v128relaxed.h +56 -0
- package/include/numkong/scalar.h +683 -0
- package/include/numkong/set/README.md +179 -0
- package/include/numkong/set/haswell.h +334 -0
- package/include/numkong/set/icelake.h +485 -0
- package/include/numkong/set/neon.h +364 -0
- package/include/numkong/set/rvv.h +226 -0
- package/include/numkong/set/rvvbb.h +117 -0
- package/include/numkong/set/serial.h +174 -0
- package/include/numkong/set/sve.h +185 -0
- package/include/numkong/set/v128relaxed.h +240 -0
- package/include/numkong/set.h +457 -0
- package/include/numkong/set.hpp +114 -0
- package/include/numkong/sets/README.md +149 -0
- package/include/numkong/sets/haswell.h +63 -0
- package/include/numkong/sets/icelake.h +66 -0
- package/include/numkong/sets/neon.h +61 -0
- package/include/numkong/sets/serial.h +43 -0
- package/include/numkong/sets/smebi32.h +1099 -0
- package/include/numkong/sets/v128relaxed.h +58 -0
- package/include/numkong/sets.h +339 -0
- package/include/numkong/sparse/README.md +156 -0
- package/include/numkong/sparse/icelake.h +463 -0
- package/include/numkong/sparse/neon.h +288 -0
- package/include/numkong/sparse/serial.h +117 -0
- package/include/numkong/sparse/sve2.h +507 -0
- package/include/numkong/sparse/turin.h +322 -0
- package/include/numkong/sparse.h +363 -0
- package/include/numkong/sparse.hpp +113 -0
- package/include/numkong/spatial/README.md +435 -0
- package/include/numkong/spatial/alder.h +607 -0
- package/include/numkong/spatial/genoa.h +290 -0
- package/include/numkong/spatial/haswell.h +960 -0
- package/include/numkong/spatial/icelake.h +586 -0
- package/include/numkong/spatial/neon.h +773 -0
- package/include/numkong/spatial/neonbfdot.h +165 -0
- package/include/numkong/spatial/neonhalf.h +118 -0
- package/include/numkong/spatial/neonsdot.h +261 -0
- package/include/numkong/spatial/rvv.h +984 -0
- package/include/numkong/spatial/rvvbf16.h +123 -0
- package/include/numkong/spatial/rvvhalf.h +117 -0
- package/include/numkong/spatial/sapphire.h +343 -0
- package/include/numkong/spatial/serial.h +346 -0
- package/include/numkong/spatial/sierra.h +323 -0
- package/include/numkong/spatial/skylake.h +606 -0
- package/include/numkong/spatial/sve.h +224 -0
- package/include/numkong/spatial/svebfdot.h +122 -0
- package/include/numkong/spatial/svehalf.h +109 -0
- package/include/numkong/spatial/v128relaxed.h +717 -0
- package/include/numkong/spatial.h +1425 -0
- package/include/numkong/spatial.hpp +183 -0
- package/include/numkong/spatials/README.md +580 -0
- package/include/numkong/spatials/alder.h +94 -0
- package/include/numkong/spatials/genoa.h +94 -0
- package/include/numkong/spatials/haswell.h +219 -0
- package/include/numkong/spatials/icelake.h +113 -0
- package/include/numkong/spatials/neon.h +109 -0
- package/include/numkong/spatials/neonbfdot.h +60 -0
- package/include/numkong/spatials/neonfhm.h +92 -0
- package/include/numkong/spatials/neonhalf.h +58 -0
- package/include/numkong/spatials/neonsdot.h +109 -0
- package/include/numkong/spatials/rvv.h +1960 -0
- package/include/numkong/spatials/sapphireamx.h +1149 -0
- package/include/numkong/spatials/serial.h +226 -0
- package/include/numkong/spatials/sierra.h +96 -0
- package/include/numkong/spatials/skylake.h +184 -0
- package/include/numkong/spatials/sme.h +1901 -0
- package/include/numkong/spatials/smef64.h +465 -0
- package/include/numkong/spatials/v128relaxed.h +240 -0
- package/include/numkong/spatials.h +3021 -0
- package/include/numkong/spatials.hpp +508 -0
- package/include/numkong/tensor.hpp +1592 -0
- package/include/numkong/trigonometry/README.md +184 -0
- package/include/numkong/trigonometry/haswell.h +652 -0
- package/include/numkong/trigonometry/neon.h +639 -0
- package/include/numkong/trigonometry/rvv.h +699 -0
- package/include/numkong/trigonometry/serial.h +703 -0
- package/include/numkong/trigonometry/skylake.h +721 -0
- package/include/numkong/trigonometry/v128relaxed.h +666 -0
- package/include/numkong/trigonometry.h +467 -0
- package/include/numkong/trigonometry.hpp +166 -0
- package/include/numkong/types.h +1384 -0
- package/include/numkong/types.hpp +5603 -0
- package/include/numkong/vector.hpp +698 -0
- package/javascript/README.md +246 -0
- package/javascript/dist/cjs/numkong-wasm.d.ts +166 -0
- package/javascript/dist/cjs/numkong-wasm.js +617 -0
- package/javascript/dist/cjs/numkong.d.ts +343 -0
- package/javascript/dist/cjs/numkong.js +523 -0
- package/javascript/dist/cjs/package.json +3 -0
- package/javascript/dist/cjs/types.d.ts +284 -0
- package/javascript/dist/cjs/types.js +653 -0
- package/javascript/dist/esm/numkong-wasm.d.ts +166 -0
- package/javascript/dist/esm/numkong-wasm.js +595 -0
- package/javascript/dist/esm/numkong.d.ts +343 -0
- package/javascript/dist/esm/numkong.js +452 -0
- package/javascript/dist/esm/package.json +3 -0
- package/javascript/dist/esm/types.d.ts +284 -0
- package/javascript/dist/esm/types.js +630 -0
- package/javascript/dist-package-cjs.json +3 -0
- package/javascript/dist-package-esm.json +3 -0
- package/javascript/node-gyp-build.d.ts +1 -0
- package/javascript/numkong-wasm.ts +756 -0
- package/javascript/numkong.c +689 -0
- package/javascript/numkong.ts +575 -0
- package/javascript/tsconfig-base.json +39 -0
- package/javascript/tsconfig-cjs.json +8 -0
- package/javascript/tsconfig-esm.json +8 -0
- package/javascript/types.ts +674 -0
- package/package.json +87 -0
package/c/dispatch.h
ADDED
|
@@ -0,0 +1,512 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @brief Common Definitions for Dispatch Files.
|
|
3
|
+
* @file c/dispatch.h
|
|
4
|
+
* @author Ash Vardanian
|
|
5
|
+
* @date February 3, 2026
|
|
6
|
+
*/
|
|
7
|
+
#ifndef NK_DISPATCH_H
|
|
8
|
+
#define NK_DISPATCH_H
|
|
9
|
+
|
|
10
|
+
#define NK_DYNAMIC_DISPATCH 1
|
|
11
|
+
#define NK_NATIVE_F16 0
|
|
12
|
+
#define NK_NATIVE_BF16 0
|
|
13
|
+
|
|
14
|
+
/* NK_TARGET_* defines are set by the build system:
|
|
15
|
+
* - Python: setup.py
|
|
16
|
+
* - Rust: build.rs
|
|
17
|
+
* - Node.js: binding.gyp
|
|
18
|
+
* - CMake: CMakeLists.txt
|
|
19
|
+
*
|
|
20
|
+
* For header-only usage without a build system, types.h provides
|
|
21
|
+
* compiler-intrinsic-based fallback detection.
|
|
22
|
+
*
|
|
23
|
+
* OS/compiler capabilities summary:
|
|
24
|
+
* - Linux: everything available in GCC 12+ and Clang 16+.
|
|
25
|
+
* - FreeBSD: same as Linux, except AMX (no kernel tile permission support).
|
|
26
|
+
* - Windows - MSVC: Haswell/Skylake/Icelake, plus Sapphire FP16 (MSVC 2022 17.2+).
|
|
27
|
+
* - macOS - Apple Clang: only Arm NEON and x86 AVX2 Haswell extensions.
|
|
28
|
+
*/
|
|
29
|
+
|
|
30
|
+
#include <numkong/numkong.h>
|
|
31
|
+
|
|
32
|
+
#ifdef __cplusplus
|
|
33
|
+
extern "C" {
|
|
34
|
+
#endif
|
|
35
|
+
|
|
36
|
+
// Forward declaration of dispatch table type (same structure as in numkong.c)
|
|
37
|
+
typedef struct {
|
|
38
|
+
// Dot products
|
|
39
|
+
nk_metric_dense_punned_t dot_f64c;
|
|
40
|
+
nk_metric_dense_punned_t dot_f32c;
|
|
41
|
+
nk_metric_dense_punned_t dot_bf16c;
|
|
42
|
+
nk_metric_dense_punned_t dot_f16c;
|
|
43
|
+
nk_metric_dense_punned_t dot_f64;
|
|
44
|
+
nk_metric_dense_punned_t dot_f32;
|
|
45
|
+
nk_metric_dense_punned_t dot_bf16;
|
|
46
|
+
nk_metric_dense_punned_t dot_f16;
|
|
47
|
+
nk_metric_dense_punned_t dot_e5m2;
|
|
48
|
+
nk_metric_dense_punned_t dot_e4m3;
|
|
49
|
+
nk_metric_dense_punned_t dot_e3m2;
|
|
50
|
+
nk_metric_dense_punned_t dot_e2m3;
|
|
51
|
+
nk_metric_dense_punned_t dot_i8;
|
|
52
|
+
nk_metric_dense_punned_t dot_u8;
|
|
53
|
+
nk_metric_dense_punned_t dot_i4;
|
|
54
|
+
nk_metric_dense_punned_t dot_u4;
|
|
55
|
+
nk_metric_dense_punned_t dot_u1;
|
|
56
|
+
nk_metric_dense_punned_t vdot_f64c;
|
|
57
|
+
nk_metric_dense_punned_t vdot_f32c;
|
|
58
|
+
nk_metric_dense_punned_t vdot_bf16c;
|
|
59
|
+
nk_metric_dense_punned_t vdot_f16c;
|
|
60
|
+
// Angular distances
|
|
61
|
+
nk_metric_dense_punned_t angular_f64;
|
|
62
|
+
nk_metric_dense_punned_t angular_f32;
|
|
63
|
+
nk_metric_dense_punned_t angular_bf16;
|
|
64
|
+
nk_metric_dense_punned_t angular_f16;
|
|
65
|
+
nk_metric_dense_punned_t angular_e5m2;
|
|
66
|
+
nk_metric_dense_punned_t angular_e4m3;
|
|
67
|
+
nk_metric_dense_punned_t angular_e3m2;
|
|
68
|
+
nk_metric_dense_punned_t angular_e2m3;
|
|
69
|
+
nk_metric_dense_punned_t angular_i8;
|
|
70
|
+
nk_metric_dense_punned_t angular_i4;
|
|
71
|
+
nk_metric_dense_punned_t angular_u8;
|
|
72
|
+
nk_metric_dense_punned_t angular_u4;
|
|
73
|
+
// Euclidean distances
|
|
74
|
+
nk_metric_dense_punned_t euclidean_f64;
|
|
75
|
+
nk_metric_dense_punned_t euclidean_f32;
|
|
76
|
+
nk_metric_dense_punned_t euclidean_bf16;
|
|
77
|
+
nk_metric_dense_punned_t euclidean_f16;
|
|
78
|
+
nk_metric_dense_punned_t euclidean_e5m2;
|
|
79
|
+
nk_metric_dense_punned_t euclidean_e4m3;
|
|
80
|
+
nk_metric_dense_punned_t euclidean_e3m2;
|
|
81
|
+
nk_metric_dense_punned_t euclidean_e2m3;
|
|
82
|
+
nk_metric_dense_punned_t euclidean_i8;
|
|
83
|
+
nk_metric_dense_punned_t euclidean_i4;
|
|
84
|
+
nk_metric_dense_punned_t euclidean_u8;
|
|
85
|
+
nk_metric_dense_punned_t euclidean_u4;
|
|
86
|
+
// Squared Euclidean distances
|
|
87
|
+
nk_metric_dense_punned_t sqeuclidean_f64;
|
|
88
|
+
nk_metric_dense_punned_t sqeuclidean_f32;
|
|
89
|
+
nk_metric_dense_punned_t sqeuclidean_bf16;
|
|
90
|
+
nk_metric_dense_punned_t sqeuclidean_f16;
|
|
91
|
+
nk_metric_dense_punned_t sqeuclidean_e5m2;
|
|
92
|
+
nk_metric_dense_punned_t sqeuclidean_e4m3;
|
|
93
|
+
nk_metric_dense_punned_t sqeuclidean_e3m2;
|
|
94
|
+
nk_metric_dense_punned_t sqeuclidean_e2m3;
|
|
95
|
+
nk_metric_dense_punned_t sqeuclidean_i8;
|
|
96
|
+
nk_metric_dense_punned_t sqeuclidean_i4;
|
|
97
|
+
nk_metric_dense_punned_t sqeuclidean_u8;
|
|
98
|
+
nk_metric_dense_punned_t sqeuclidean_u4;
|
|
99
|
+
// Binary distances
|
|
100
|
+
nk_metric_dense_punned_t hamming_u8;
|
|
101
|
+
nk_metric_dense_punned_t hamming_u1;
|
|
102
|
+
nk_metric_dense_punned_t jaccard_u32;
|
|
103
|
+
nk_metric_dense_punned_t jaccard_u16;
|
|
104
|
+
nk_metric_dense_punned_t jaccard_u1;
|
|
105
|
+
// Curved spaces
|
|
106
|
+
nk_metric_curved_punned_t bilinear_f64c;
|
|
107
|
+
nk_metric_curved_punned_t bilinear_f32c;
|
|
108
|
+
nk_metric_curved_punned_t bilinear_bf16c;
|
|
109
|
+
nk_metric_curved_punned_t bilinear_f16c;
|
|
110
|
+
nk_metric_curved_punned_t bilinear_f64;
|
|
111
|
+
nk_metric_curved_punned_t bilinear_f32;
|
|
112
|
+
nk_metric_curved_punned_t bilinear_bf16;
|
|
113
|
+
nk_metric_curved_punned_t bilinear_f16;
|
|
114
|
+
nk_metric_curved_punned_t mahalanobis_f64;
|
|
115
|
+
nk_metric_curved_punned_t mahalanobis_f32;
|
|
116
|
+
nk_metric_curved_punned_t mahalanobis_bf16;
|
|
117
|
+
nk_metric_curved_punned_t mahalanobis_f16;
|
|
118
|
+
// Geospatial distances
|
|
119
|
+
nk_metric_geospatial_punned_t haversine_f64;
|
|
120
|
+
nk_metric_geospatial_punned_t haversine_f32;
|
|
121
|
+
nk_metric_geospatial_punned_t vincenty_f64;
|
|
122
|
+
nk_metric_geospatial_punned_t vincenty_f32;
|
|
123
|
+
// Probability distributions
|
|
124
|
+
nk_metric_dense_punned_t kld_f64;
|
|
125
|
+
nk_metric_dense_punned_t kld_f32;
|
|
126
|
+
nk_metric_dense_punned_t kld_bf16;
|
|
127
|
+
nk_metric_dense_punned_t kld_f16;
|
|
128
|
+
nk_metric_dense_punned_t jsd_f64;
|
|
129
|
+
nk_metric_dense_punned_t jsd_f32;
|
|
130
|
+
nk_metric_dense_punned_t jsd_bf16;
|
|
131
|
+
nk_metric_dense_punned_t jsd_f16;
|
|
132
|
+
// Mesh alignment
|
|
133
|
+
nk_metric_mesh_punned_t rmsd_f64;
|
|
134
|
+
nk_metric_mesh_punned_t rmsd_f32;
|
|
135
|
+
nk_metric_mesh_punned_t rmsd_bf16;
|
|
136
|
+
nk_metric_mesh_punned_t rmsd_f16;
|
|
137
|
+
nk_metric_mesh_punned_t kabsch_f64;
|
|
138
|
+
nk_metric_mesh_punned_t kabsch_f32;
|
|
139
|
+
nk_metric_mesh_punned_t kabsch_bf16;
|
|
140
|
+
nk_metric_mesh_punned_t kabsch_f16;
|
|
141
|
+
nk_metric_mesh_punned_t umeyama_f64;
|
|
142
|
+
nk_metric_mesh_punned_t umeyama_f32;
|
|
143
|
+
nk_metric_mesh_punned_t umeyama_bf16;
|
|
144
|
+
nk_metric_mesh_punned_t umeyama_f16;
|
|
145
|
+
// Sparse intersections
|
|
146
|
+
nk_sparse_intersect_punned_t sparse_intersect_u64;
|
|
147
|
+
nk_sparse_intersect_punned_t sparse_intersect_u32;
|
|
148
|
+
nk_sparse_intersect_punned_t sparse_intersect_u16;
|
|
149
|
+
// Sparse dot products
|
|
150
|
+
nk_sparse_dot_punned_t sparse_dot_u32f32;
|
|
151
|
+
nk_sparse_dot_punned_t sparse_dot_u16bf16;
|
|
152
|
+
// Element-wise scale
|
|
153
|
+
nk_each_scale_punned_t each_scale_f64c;
|
|
154
|
+
nk_each_scale_punned_t each_scale_f32c;
|
|
155
|
+
nk_each_scale_punned_t each_scale_f64;
|
|
156
|
+
nk_each_scale_punned_t each_scale_f32;
|
|
157
|
+
nk_each_scale_punned_t each_scale_bf16;
|
|
158
|
+
nk_each_scale_punned_t each_scale_f16;
|
|
159
|
+
nk_each_scale_punned_t each_scale_e5m2;
|
|
160
|
+
nk_each_scale_punned_t each_scale_e4m3;
|
|
161
|
+
nk_each_scale_punned_t each_scale_e3m2;
|
|
162
|
+
nk_each_scale_punned_t each_scale_e2m3;
|
|
163
|
+
nk_each_scale_punned_t each_scale_i64;
|
|
164
|
+
nk_each_scale_punned_t each_scale_i32;
|
|
165
|
+
nk_each_scale_punned_t each_scale_i16;
|
|
166
|
+
nk_each_scale_punned_t each_scale_i8;
|
|
167
|
+
nk_each_scale_punned_t each_scale_u64;
|
|
168
|
+
nk_each_scale_punned_t each_scale_u32;
|
|
169
|
+
nk_each_scale_punned_t each_scale_u16;
|
|
170
|
+
nk_each_scale_punned_t each_scale_u8;
|
|
171
|
+
// Element-wise sum
|
|
172
|
+
nk_each_sum_punned_t each_sum_f64c;
|
|
173
|
+
nk_each_sum_punned_t each_sum_f32c;
|
|
174
|
+
nk_each_sum_punned_t each_sum_f64;
|
|
175
|
+
nk_each_sum_punned_t each_sum_f32;
|
|
176
|
+
nk_each_sum_punned_t each_sum_bf16;
|
|
177
|
+
nk_each_sum_punned_t each_sum_f16;
|
|
178
|
+
nk_each_sum_punned_t each_sum_e5m2;
|
|
179
|
+
nk_each_sum_punned_t each_sum_e4m3;
|
|
180
|
+
nk_each_sum_punned_t each_sum_e3m2;
|
|
181
|
+
nk_each_sum_punned_t each_sum_e2m3;
|
|
182
|
+
nk_each_sum_punned_t each_sum_i64;
|
|
183
|
+
nk_each_sum_punned_t each_sum_i32;
|
|
184
|
+
nk_each_sum_punned_t each_sum_i16;
|
|
185
|
+
nk_each_sum_punned_t each_sum_i8;
|
|
186
|
+
nk_each_sum_punned_t each_sum_u64;
|
|
187
|
+
nk_each_sum_punned_t each_sum_u32;
|
|
188
|
+
nk_each_sum_punned_t each_sum_u16;
|
|
189
|
+
nk_each_sum_punned_t each_sum_u8;
|
|
190
|
+
// Element-wise blend
|
|
191
|
+
nk_each_blend_punned_t each_blend_f64c;
|
|
192
|
+
nk_each_blend_punned_t each_blend_f32c;
|
|
193
|
+
nk_each_blend_punned_t each_blend_f64;
|
|
194
|
+
nk_each_blend_punned_t each_blend_f32;
|
|
195
|
+
nk_each_blend_punned_t each_blend_bf16;
|
|
196
|
+
nk_each_blend_punned_t each_blend_f16;
|
|
197
|
+
nk_each_blend_punned_t each_blend_e5m2;
|
|
198
|
+
nk_each_blend_punned_t each_blend_e4m3;
|
|
199
|
+
nk_each_blend_punned_t each_blend_e3m2;
|
|
200
|
+
nk_each_blend_punned_t each_blend_e2m3;
|
|
201
|
+
nk_each_blend_punned_t each_blend_i64;
|
|
202
|
+
nk_each_blend_punned_t each_blend_i32;
|
|
203
|
+
nk_each_blend_punned_t each_blend_i16;
|
|
204
|
+
nk_each_blend_punned_t each_blend_i8;
|
|
205
|
+
nk_each_blend_punned_t each_blend_u64;
|
|
206
|
+
nk_each_blend_punned_t each_blend_u32;
|
|
207
|
+
nk_each_blend_punned_t each_blend_u16;
|
|
208
|
+
nk_each_blend_punned_t each_blend_u8;
|
|
209
|
+
// Element-wise FMA
|
|
210
|
+
nk_each_fma_punned_t each_fma_f64c;
|
|
211
|
+
nk_each_fma_punned_t each_fma_f32c;
|
|
212
|
+
nk_each_fma_punned_t each_fma_f64;
|
|
213
|
+
nk_each_fma_punned_t each_fma_f32;
|
|
214
|
+
nk_each_fma_punned_t each_fma_bf16;
|
|
215
|
+
nk_each_fma_punned_t each_fma_f16;
|
|
216
|
+
nk_each_fma_punned_t each_fma_e5m2;
|
|
217
|
+
nk_each_fma_punned_t each_fma_e4m3;
|
|
218
|
+
nk_each_fma_punned_t each_fma_e3m2;
|
|
219
|
+
nk_each_fma_punned_t each_fma_e2m3;
|
|
220
|
+
nk_each_fma_punned_t each_fma_i64;
|
|
221
|
+
nk_each_fma_punned_t each_fma_i32;
|
|
222
|
+
nk_each_fma_punned_t each_fma_i16;
|
|
223
|
+
nk_each_fma_punned_t each_fma_i8;
|
|
224
|
+
nk_each_fma_punned_t each_fma_u64;
|
|
225
|
+
nk_each_fma_punned_t each_fma_u32;
|
|
226
|
+
nk_each_fma_punned_t each_fma_u16;
|
|
227
|
+
nk_each_fma_punned_t each_fma_u8;
|
|
228
|
+
// Trigonometry
|
|
229
|
+
nk_kernel_trigonometry_punned_t each_sin_f64;
|
|
230
|
+
nk_kernel_trigonometry_punned_t each_sin_f32;
|
|
231
|
+
nk_kernel_trigonometry_punned_t each_sin_f16;
|
|
232
|
+
nk_kernel_trigonometry_punned_t each_cos_f64;
|
|
233
|
+
nk_kernel_trigonometry_punned_t each_cos_f32;
|
|
234
|
+
nk_kernel_trigonometry_punned_t each_cos_f16;
|
|
235
|
+
nk_kernel_trigonometry_punned_t each_atan_f64;
|
|
236
|
+
nk_kernel_trigonometry_punned_t each_atan_f32;
|
|
237
|
+
nk_kernel_trigonometry_punned_t each_atan_f16;
|
|
238
|
+
// Reduce moments (sum + sum-of-squares)
|
|
239
|
+
nk_kernel_reduce_moments_punned_t reduce_moments_f64;
|
|
240
|
+
nk_kernel_reduce_moments_punned_t reduce_moments_f32;
|
|
241
|
+
nk_kernel_reduce_moments_punned_t reduce_moments_bf16;
|
|
242
|
+
nk_kernel_reduce_moments_punned_t reduce_moments_f16;
|
|
243
|
+
nk_kernel_reduce_moments_punned_t reduce_moments_e5m2;
|
|
244
|
+
nk_kernel_reduce_moments_punned_t reduce_moments_e4m3;
|
|
245
|
+
nk_kernel_reduce_moments_punned_t reduce_moments_e3m2;
|
|
246
|
+
nk_kernel_reduce_moments_punned_t reduce_moments_e2m3;
|
|
247
|
+
nk_kernel_reduce_moments_punned_t reduce_moments_i64;
|
|
248
|
+
nk_kernel_reduce_moments_punned_t reduce_moments_i32;
|
|
249
|
+
nk_kernel_reduce_moments_punned_t reduce_moments_i16;
|
|
250
|
+
nk_kernel_reduce_moments_punned_t reduce_moments_i8;
|
|
251
|
+
nk_kernel_reduce_moments_punned_t reduce_moments_i4;
|
|
252
|
+
nk_kernel_reduce_moments_punned_t reduce_moments_u64;
|
|
253
|
+
nk_kernel_reduce_moments_punned_t reduce_moments_u32;
|
|
254
|
+
nk_kernel_reduce_moments_punned_t reduce_moments_u16;
|
|
255
|
+
nk_kernel_reduce_moments_punned_t reduce_moments_u8;
|
|
256
|
+
nk_kernel_reduce_moments_punned_t reduce_moments_u4;
|
|
257
|
+
nk_kernel_reduce_moments_punned_t reduce_moments_u1;
|
|
258
|
+
// Reduce minmax (min + argmin + max + argmax)
|
|
259
|
+
nk_kernel_reduce_minmax_punned_t reduce_minmax_f64;
|
|
260
|
+
nk_kernel_reduce_minmax_punned_t reduce_minmax_f32;
|
|
261
|
+
nk_kernel_reduce_minmax_punned_t reduce_minmax_bf16;
|
|
262
|
+
nk_kernel_reduce_minmax_punned_t reduce_minmax_f16;
|
|
263
|
+
nk_kernel_reduce_minmax_punned_t reduce_minmax_e5m2;
|
|
264
|
+
nk_kernel_reduce_minmax_punned_t reduce_minmax_e4m3;
|
|
265
|
+
nk_kernel_reduce_minmax_punned_t reduce_minmax_e3m2;
|
|
266
|
+
nk_kernel_reduce_minmax_punned_t reduce_minmax_e2m3;
|
|
267
|
+
nk_kernel_reduce_minmax_punned_t reduce_minmax_i64;
|
|
268
|
+
nk_kernel_reduce_minmax_punned_t reduce_minmax_i32;
|
|
269
|
+
nk_kernel_reduce_minmax_punned_t reduce_minmax_i16;
|
|
270
|
+
nk_kernel_reduce_minmax_punned_t reduce_minmax_i8;
|
|
271
|
+
nk_kernel_reduce_minmax_punned_t reduce_minmax_i4;
|
|
272
|
+
nk_kernel_reduce_minmax_punned_t reduce_minmax_u64;
|
|
273
|
+
nk_kernel_reduce_minmax_punned_t reduce_minmax_u32;
|
|
274
|
+
nk_kernel_reduce_minmax_punned_t reduce_minmax_u16;
|
|
275
|
+
nk_kernel_reduce_minmax_punned_t reduce_minmax_u8;
|
|
276
|
+
nk_kernel_reduce_minmax_punned_t reduce_minmax_u4;
|
|
277
|
+
nk_kernel_reduce_minmax_punned_t reduce_minmax_u1;
|
|
278
|
+
// Dots packed size
|
|
279
|
+
nk_dots_packed_size_punned_t dots_packed_size_f64;
|
|
280
|
+
nk_dots_packed_size_punned_t dots_packed_size_f32;
|
|
281
|
+
nk_dots_packed_size_punned_t dots_packed_size_bf16;
|
|
282
|
+
nk_dots_packed_size_punned_t dots_packed_size_f16;
|
|
283
|
+
nk_dots_packed_size_punned_t dots_packed_size_e5m2;
|
|
284
|
+
nk_dots_packed_size_punned_t dots_packed_size_e4m3;
|
|
285
|
+
nk_dots_packed_size_punned_t dots_packed_size_e3m2;
|
|
286
|
+
nk_dots_packed_size_punned_t dots_packed_size_e2m3;
|
|
287
|
+
nk_dots_packed_size_punned_t dots_packed_size_i8;
|
|
288
|
+
nk_dots_packed_size_punned_t dots_packed_size_i4;
|
|
289
|
+
nk_dots_packed_size_punned_t dots_packed_size_u8;
|
|
290
|
+
nk_dots_packed_size_punned_t dots_packed_size_u4;
|
|
291
|
+
nk_dots_packed_size_punned_t dots_packed_size_u1;
|
|
292
|
+
// Dots pack
|
|
293
|
+
nk_dots_pack_punned_t dots_pack_f64;
|
|
294
|
+
nk_dots_pack_punned_t dots_pack_f32;
|
|
295
|
+
nk_dots_pack_punned_t dots_pack_bf16;
|
|
296
|
+
nk_dots_pack_punned_t dots_pack_f16;
|
|
297
|
+
nk_dots_pack_punned_t dots_pack_e5m2;
|
|
298
|
+
nk_dots_pack_punned_t dots_pack_e4m3;
|
|
299
|
+
nk_dots_pack_punned_t dots_pack_e3m2;
|
|
300
|
+
nk_dots_pack_punned_t dots_pack_e2m3;
|
|
301
|
+
nk_dots_pack_punned_t dots_pack_i8;
|
|
302
|
+
nk_dots_pack_punned_t dots_pack_i4;
|
|
303
|
+
nk_dots_pack_punned_t dots_pack_u8;
|
|
304
|
+
nk_dots_pack_punned_t dots_pack_u4;
|
|
305
|
+
nk_dots_pack_punned_t dots_pack_u1;
|
|
306
|
+
// Dots packed
|
|
307
|
+
nk_dots_packed_punned_t dots_packed_f64;
|
|
308
|
+
nk_dots_packed_punned_t dots_packed_f32;
|
|
309
|
+
nk_dots_packed_punned_t dots_packed_bf16;
|
|
310
|
+
nk_dots_packed_punned_t dots_packed_f16;
|
|
311
|
+
nk_dots_packed_punned_t dots_packed_e5m2;
|
|
312
|
+
nk_dots_packed_punned_t dots_packed_e4m3;
|
|
313
|
+
nk_dots_packed_punned_t dots_packed_e3m2;
|
|
314
|
+
nk_dots_packed_punned_t dots_packed_e2m3;
|
|
315
|
+
nk_dots_packed_punned_t dots_packed_i8;
|
|
316
|
+
nk_dots_packed_punned_t dots_packed_i4;
|
|
317
|
+
nk_dots_packed_punned_t dots_packed_u8;
|
|
318
|
+
nk_dots_packed_punned_t dots_packed_u4;
|
|
319
|
+
nk_dots_packed_punned_t dots_packed_u1;
|
|
320
|
+
// Sets packed
|
|
321
|
+
nk_hammings_packed_punned_t hammings_packed_u1;
|
|
322
|
+
nk_jaccards_packed_punned_t jaccards_packed_u1;
|
|
323
|
+
// Dots symmetric
|
|
324
|
+
nk_dots_symmetric_punned_t dots_symmetric_f64;
|
|
325
|
+
nk_dots_symmetric_punned_t dots_symmetric_f32;
|
|
326
|
+
nk_dots_symmetric_punned_t dots_symmetric_bf16;
|
|
327
|
+
nk_dots_symmetric_punned_t dots_symmetric_f16;
|
|
328
|
+
nk_dots_symmetric_punned_t dots_symmetric_e5m2;
|
|
329
|
+
nk_dots_symmetric_punned_t dots_symmetric_e4m3;
|
|
330
|
+
nk_dots_symmetric_punned_t dots_symmetric_e3m2;
|
|
331
|
+
nk_dots_symmetric_punned_t dots_symmetric_e2m3;
|
|
332
|
+
nk_dots_symmetric_punned_t dots_symmetric_i8;
|
|
333
|
+
nk_dots_symmetric_punned_t dots_symmetric_i4;
|
|
334
|
+
nk_dots_symmetric_punned_t dots_symmetric_u8;
|
|
335
|
+
nk_dots_symmetric_punned_t dots_symmetric_u4;
|
|
336
|
+
nk_dots_symmetric_punned_t dots_symmetric_u1;
|
|
337
|
+
// Sets symmetric
|
|
338
|
+
nk_hammings_symmetric_punned_t hammings_symmetric_u1;
|
|
339
|
+
nk_jaccards_symmetric_punned_t jaccards_symmetric_u1;
|
|
340
|
+
// Angulars packed
|
|
341
|
+
nk_angulars_packed_punned_t angulars_packed_f64;
|
|
342
|
+
nk_angulars_packed_punned_t angulars_packed_f32;
|
|
343
|
+
nk_angulars_packed_punned_t angulars_packed_bf16;
|
|
344
|
+
nk_angulars_packed_punned_t angulars_packed_f16;
|
|
345
|
+
nk_angulars_packed_punned_t angulars_packed_e5m2;
|
|
346
|
+
nk_angulars_packed_punned_t angulars_packed_e4m3;
|
|
347
|
+
nk_angulars_packed_punned_t angulars_packed_e3m2;
|
|
348
|
+
nk_angulars_packed_punned_t angulars_packed_e2m3;
|
|
349
|
+
nk_angulars_packed_punned_t angulars_packed_i8;
|
|
350
|
+
nk_angulars_packed_punned_t angulars_packed_i4;
|
|
351
|
+
nk_angulars_packed_punned_t angulars_packed_u8;
|
|
352
|
+
nk_angulars_packed_punned_t angulars_packed_u4;
|
|
353
|
+
// Angulars symmetric
|
|
354
|
+
nk_angulars_symmetric_punned_t angulars_symmetric_f64;
|
|
355
|
+
nk_angulars_symmetric_punned_t angulars_symmetric_f32;
|
|
356
|
+
nk_angulars_symmetric_punned_t angulars_symmetric_bf16;
|
|
357
|
+
nk_angulars_symmetric_punned_t angulars_symmetric_f16;
|
|
358
|
+
nk_angulars_symmetric_punned_t angulars_symmetric_e5m2;
|
|
359
|
+
nk_angulars_symmetric_punned_t angulars_symmetric_e4m3;
|
|
360
|
+
nk_angulars_symmetric_punned_t angulars_symmetric_e3m2;
|
|
361
|
+
nk_angulars_symmetric_punned_t angulars_symmetric_e2m3;
|
|
362
|
+
nk_angulars_symmetric_punned_t angulars_symmetric_i8;
|
|
363
|
+
nk_angulars_symmetric_punned_t angulars_symmetric_i4;
|
|
364
|
+
nk_angulars_symmetric_punned_t angulars_symmetric_u8;
|
|
365
|
+
nk_angulars_symmetric_punned_t angulars_symmetric_u4;
|
|
366
|
+
// Euclideans packed
|
|
367
|
+
nk_euclideans_packed_punned_t euclideans_packed_f64;
|
|
368
|
+
nk_euclideans_packed_punned_t euclideans_packed_f32;
|
|
369
|
+
nk_euclideans_packed_punned_t euclideans_packed_bf16;
|
|
370
|
+
nk_euclideans_packed_punned_t euclideans_packed_f16;
|
|
371
|
+
nk_euclideans_packed_punned_t euclideans_packed_e5m2;
|
|
372
|
+
nk_euclideans_packed_punned_t euclideans_packed_e4m3;
|
|
373
|
+
nk_euclideans_packed_punned_t euclideans_packed_e3m2;
|
|
374
|
+
nk_euclideans_packed_punned_t euclideans_packed_e2m3;
|
|
375
|
+
nk_euclideans_packed_punned_t euclideans_packed_i8;
|
|
376
|
+
nk_euclideans_packed_punned_t euclideans_packed_i4;
|
|
377
|
+
nk_euclideans_packed_punned_t euclideans_packed_u8;
|
|
378
|
+
nk_euclideans_packed_punned_t euclideans_packed_u4;
|
|
379
|
+
// Euclideans symmetric
|
|
380
|
+
nk_euclideans_symmetric_punned_t euclideans_symmetric_f64;
|
|
381
|
+
nk_euclideans_symmetric_punned_t euclideans_symmetric_f32;
|
|
382
|
+
nk_euclideans_symmetric_punned_t euclideans_symmetric_bf16;
|
|
383
|
+
nk_euclideans_symmetric_punned_t euclideans_symmetric_f16;
|
|
384
|
+
nk_euclideans_symmetric_punned_t euclideans_symmetric_e5m2;
|
|
385
|
+
nk_euclideans_symmetric_punned_t euclideans_symmetric_e4m3;
|
|
386
|
+
nk_euclideans_symmetric_punned_t euclideans_symmetric_e3m2;
|
|
387
|
+
nk_euclideans_symmetric_punned_t euclideans_symmetric_e2m3;
|
|
388
|
+
nk_euclideans_symmetric_punned_t euclideans_symmetric_i8;
|
|
389
|
+
nk_euclideans_symmetric_punned_t euclideans_symmetric_i4;
|
|
390
|
+
nk_euclideans_symmetric_punned_t euclideans_symmetric_u8;
|
|
391
|
+
nk_euclideans_symmetric_punned_t euclideans_symmetric_u4;
|
|
392
|
+
// MaxSim packed size
|
|
393
|
+
nk_dots_packed_size_punned_t maxsim_packed_size_f32;
|
|
394
|
+
nk_dots_packed_size_punned_t maxsim_packed_size_bf16;
|
|
395
|
+
nk_dots_packed_size_punned_t maxsim_packed_size_f16;
|
|
396
|
+
// MaxSim pack
|
|
397
|
+
nk_dots_pack_punned_t maxsim_pack_f32;
|
|
398
|
+
nk_dots_pack_punned_t maxsim_pack_bf16;
|
|
399
|
+
nk_dots_pack_punned_t maxsim_pack_f16;
|
|
400
|
+
// MaxSim packed
|
|
401
|
+
nk_maxsim_packed_punned_t maxsim_packed_f32;
|
|
402
|
+
nk_maxsim_packed_punned_t maxsim_packed_bf16;
|
|
403
|
+
nk_maxsim_packed_punned_t maxsim_packed_f16;
|
|
404
|
+
// Type casting
|
|
405
|
+
nk_kernel_cast_punned_t cast;
|
|
406
|
+
// Scalar conversions
|
|
407
|
+
void (*bf16_to_f32)(nk_bf16_t const *, nk_f32_t *);
|
|
408
|
+
void (*f32_to_bf16)(nk_f32_t const *, nk_bf16_t *);
|
|
409
|
+
void (*f16_to_f32)(nk_f16_t const *, nk_f32_t *);
|
|
410
|
+
void (*f32_to_f16)(nk_f32_t const *, nk_f16_t *);
|
|
411
|
+
void (*e5m2_to_f32)(nk_e5m2_t const *, nk_f32_t *);
|
|
412
|
+
void (*f32_to_e5m2)(nk_f32_t const *, nk_e5m2_t *);
|
|
413
|
+
void (*e4m3_to_f32)(nk_e4m3_t const *, nk_f32_t *);
|
|
414
|
+
void (*f32_to_e4m3)(nk_f32_t const *, nk_e4m3_t *);
|
|
415
|
+
void (*e3m2_to_f32)(nk_e3m2_t const *, nk_f32_t *);
|
|
416
|
+
void (*f32_to_e3m2)(nk_f32_t const *, nk_e3m2_t *);
|
|
417
|
+
void (*e2m3_to_f32)(nk_e2m3_t const *, nk_f32_t *);
|
|
418
|
+
void (*f32_to_e2m3)(nk_f32_t const *, nk_e2m3_t *);
|
|
419
|
+
// Scalar math
|
|
420
|
+
nk_f64_t (*f64_sqrt)(nk_f64_t);
|
|
421
|
+
nk_f64_t (*f64_rsqrt)(nk_f64_t);
|
|
422
|
+
nk_f64_t (*f64_fma)(nk_f64_t, nk_f64_t, nk_f64_t);
|
|
423
|
+
nk_f32_t (*f32_sqrt)(nk_f32_t);
|
|
424
|
+
nk_f32_t (*f32_rsqrt)(nk_f32_t);
|
|
425
|
+
nk_f32_t (*f32_fma)(nk_f32_t, nk_f32_t, nk_f32_t);
|
|
426
|
+
nk_f16_t (*f16_sqrt)(nk_f16_t);
|
|
427
|
+
nk_f16_t (*f16_rsqrt)(nk_f16_t);
|
|
428
|
+
nk_f16_t (*f16_fma)(nk_f16_t, nk_f16_t, nk_f16_t);
|
|
429
|
+
// Scalar saturating arithmetic
|
|
430
|
+
nk_i64_t (*i64_saturating_add)(nk_i64_t, nk_i64_t);
|
|
431
|
+
nk_i64_t (*i64_saturating_mul)(nk_i64_t, nk_i64_t);
|
|
432
|
+
nk_i32_t (*i32_saturating_add)(nk_i32_t, nk_i32_t);
|
|
433
|
+
nk_i32_t (*i32_saturating_mul)(nk_i32_t, nk_i32_t);
|
|
434
|
+
nk_i16_t (*i16_saturating_add)(nk_i16_t, nk_i16_t);
|
|
435
|
+
nk_i16_t (*i16_saturating_mul)(nk_i16_t, nk_i16_t);
|
|
436
|
+
nk_i8_t (*i8_saturating_add)(nk_i8_t, nk_i8_t);
|
|
437
|
+
nk_i8_t (*i8_saturating_mul)(nk_i8_t, nk_i8_t);
|
|
438
|
+
nk_i4x2_t (*i4x2_saturating_add)(nk_i4x2_t, nk_i4x2_t);
|
|
439
|
+
nk_i4x2_t (*i4x2_saturating_mul)(nk_i4x2_t, nk_i4x2_t);
|
|
440
|
+
nk_u64_t (*u64_saturating_add)(nk_u64_t, nk_u64_t);
|
|
441
|
+
nk_u64_t (*u64_saturating_mul)(nk_u64_t, nk_u64_t);
|
|
442
|
+
nk_u32_t (*u32_saturating_add)(nk_u32_t, nk_u32_t);
|
|
443
|
+
nk_u32_t (*u32_saturating_mul)(nk_u32_t, nk_u32_t);
|
|
444
|
+
nk_u16_t (*u16_saturating_add)(nk_u16_t, nk_u16_t);
|
|
445
|
+
nk_u16_t (*u16_saturating_mul)(nk_u16_t, nk_u16_t);
|
|
446
|
+
nk_u8_t (*u8_saturating_add)(nk_u8_t, nk_u8_t);
|
|
447
|
+
nk_u8_t (*u8_saturating_mul)(nk_u8_t, nk_u8_t);
|
|
448
|
+
nk_u4x2_t (*u4x2_saturating_add)(nk_u4x2_t, nk_u4x2_t);
|
|
449
|
+
nk_u4x2_t (*u4x2_saturating_mul)(nk_u4x2_t, nk_u4x2_t);
|
|
450
|
+
// Scalar ordering
|
|
451
|
+
int (*bf16_order)(nk_bf16_t, nk_bf16_t);
|
|
452
|
+
int (*f16_order)(nk_f16_t, nk_f16_t);
|
|
453
|
+
int (*e5m2_order)(nk_e5m2_t, nk_e5m2_t);
|
|
454
|
+
int (*e4m3_order)(nk_e4m3_t, nk_e4m3_t);
|
|
455
|
+
int (*e3m2_order)(nk_e3m2_t, nk_e3m2_t);
|
|
456
|
+
int (*e2m3_order)(nk_e2m3_t, nk_e2m3_t);
|
|
457
|
+
} nk_implementations_t;
|
|
458
|
+
|
|
459
|
+
// Global dispatch table - defined in numkong.c
|
|
460
|
+
extern nk_implementations_t nk_dispatch_table;
|
|
461
|
+
|
|
462
|
+
// Error handlers - defined in numkong.c
|
|
463
|
+
extern void nk_error_dense_(void const *, void const *, nk_size_t, void *);
|
|
464
|
+
extern void nk_error_sparse_intersect_(void const *, void const *, nk_size_t, nk_size_t, void *, nk_size_t *);
|
|
465
|
+
extern void nk_error_sparse_dot_(void const *, void const *, void const *, void const *, nk_size_t, nk_size_t, void *);
|
|
466
|
+
extern void nk_error_curved_(void const *, void const *, void const *, nk_size_t, void *);
|
|
467
|
+
extern void nk_error_geospatial_(void const *, void const *, void const *, void const *, nk_size_t, void *);
|
|
468
|
+
extern void nk_error_each_fma_(void const *, void const *, void const *, nk_size_t, void const *, void const *, void *);
|
|
469
|
+
extern void nk_error_each_blend_(void const *, void const *, nk_size_t, void const *, void const *, void *);
|
|
470
|
+
extern void nk_error_each_scale_(void const *, nk_size_t, void const *, void const *, void *);
|
|
471
|
+
extern void nk_error_each_sum_(void const *, void const *, nk_size_t, void *);
|
|
472
|
+
extern void nk_error_trigonometry_(void const *, nk_size_t, void *);
|
|
473
|
+
extern void nk_error_mesh_(void const *, void const *, nk_size_t, void *, void *, void *, void *, void *);
|
|
474
|
+
extern void nk_error_reduce_moments_(void const *, nk_size_t, nk_size_t, void *, void *);
|
|
475
|
+
extern void nk_error_reduce_minmax_(void const *, nk_size_t, nk_size_t, void *, nk_size_t *, void *, nk_size_t *);
|
|
476
|
+
extern nk_size_t nk_error_packed_size_(nk_size_t, nk_size_t);
|
|
477
|
+
extern void nk_error_pack_(void const *, nk_size_t, nk_size_t, nk_size_t, void *);
|
|
478
|
+
extern void nk_error_dots_(void const *, void const *, void *, nk_size_t, nk_size_t, nk_size_t, nk_size_t, nk_size_t);
|
|
479
|
+
extern void nk_error_dots_symmetric_(void const *, nk_size_t, nk_size_t, nk_size_t, void *, nk_size_t, nk_size_t,
|
|
480
|
+
nk_size_t);
|
|
481
|
+
|
|
482
|
+
// Dtype-specific kernel lookup functions
|
|
483
|
+
extern void nk_dispatch_f64c_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
|
|
484
|
+
extern void nk_dispatch_f32c_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
|
|
485
|
+
extern void nk_dispatch_bf16c_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
|
|
486
|
+
extern void nk_dispatch_f16c_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
|
|
487
|
+
extern void nk_dispatch_f64_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
|
|
488
|
+
extern void nk_dispatch_f32_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
|
|
489
|
+
extern void nk_dispatch_bf16_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
|
|
490
|
+
extern void nk_dispatch_f16_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
|
|
491
|
+
extern void nk_dispatch_e5m2_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
|
|
492
|
+
extern void nk_dispatch_e4m3_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
|
|
493
|
+
extern void nk_dispatch_e3m2_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
|
|
494
|
+
extern void nk_dispatch_e2m3_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
|
|
495
|
+
extern void nk_dispatch_i64_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
|
|
496
|
+
extern void nk_dispatch_i32_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
|
|
497
|
+
extern void nk_dispatch_i16_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
|
|
498
|
+
extern void nk_dispatch_i8_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
|
|
499
|
+
extern void nk_dispatch_i4_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
|
|
500
|
+
extern void nk_dispatch_u64_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
|
|
501
|
+
extern void nk_dispatch_u32_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
|
|
502
|
+
extern void nk_dispatch_u16_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
|
|
503
|
+
extern void nk_dispatch_u8_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
|
|
504
|
+
extern void nk_dispatch_u4_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
|
|
505
|
+
extern void nk_dispatch_u1_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
|
|
506
|
+
extern void nk_dispatch_cast_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
|
|
507
|
+
|
|
508
|
+
#ifdef __cplusplus
|
|
509
|
+
}
|
|
510
|
+
#endif
|
|
511
|
+
|
|
512
|
+
#endif // NK_DISPATCH_H
|