numkong 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (294) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +495 -0
  3. package/binding.gyp +540 -0
  4. package/c/dispatch.h +512 -0
  5. package/c/dispatch_bf16.c +389 -0
  6. package/c/dispatch_bf16c.c +52 -0
  7. package/c/dispatch_e2m3.c +263 -0
  8. package/c/dispatch_e3m2.c +243 -0
  9. package/c/dispatch_e4m3.c +276 -0
  10. package/c/dispatch_e5m2.c +272 -0
  11. package/c/dispatch_f16.c +376 -0
  12. package/c/dispatch_f16c.c +58 -0
  13. package/c/dispatch_f32.c +378 -0
  14. package/c/dispatch_f32c.c +99 -0
  15. package/c/dispatch_f64.c +296 -0
  16. package/c/dispatch_f64c.c +98 -0
  17. package/c/dispatch_i16.c +96 -0
  18. package/c/dispatch_i32.c +89 -0
  19. package/c/dispatch_i4.c +150 -0
  20. package/c/dispatch_i64.c +86 -0
  21. package/c/dispatch_i8.c +289 -0
  22. package/c/dispatch_other.c +330 -0
  23. package/c/dispatch_u1.c +148 -0
  24. package/c/dispatch_u16.c +124 -0
  25. package/c/dispatch_u32.c +118 -0
  26. package/c/dispatch_u4.c +150 -0
  27. package/c/dispatch_u64.c +102 -0
  28. package/c/dispatch_u8.c +303 -0
  29. package/c/numkong.c +950 -0
  30. package/include/README.md +573 -0
  31. package/include/module.modulemap +129 -0
  32. package/include/numkong/attention/sapphireamx.h +1361 -0
  33. package/include/numkong/attention/sme.h +2066 -0
  34. package/include/numkong/attention.h +49 -0
  35. package/include/numkong/capabilities.h +748 -0
  36. package/include/numkong/cast/README.md +262 -0
  37. package/include/numkong/cast/haswell.h +975 -0
  38. package/include/numkong/cast/icelake.h +470 -0
  39. package/include/numkong/cast/neon.h +1192 -0
  40. package/include/numkong/cast/rvv.h +1021 -0
  41. package/include/numkong/cast/sapphire.h +262 -0
  42. package/include/numkong/cast/serial.h +2262 -0
  43. package/include/numkong/cast/skylake.h +856 -0
  44. package/include/numkong/cast/v128relaxed.h +180 -0
  45. package/include/numkong/cast.h +230 -0
  46. package/include/numkong/curved/README.md +223 -0
  47. package/include/numkong/curved/genoa.h +182 -0
  48. package/include/numkong/curved/haswell.h +276 -0
  49. package/include/numkong/curved/neon.h +205 -0
  50. package/include/numkong/curved/neonbfdot.h +212 -0
  51. package/include/numkong/curved/neonhalf.h +212 -0
  52. package/include/numkong/curved/rvv.h +305 -0
  53. package/include/numkong/curved/serial.h +207 -0
  54. package/include/numkong/curved/skylake.h +457 -0
  55. package/include/numkong/curved/smef64.h +506 -0
  56. package/include/numkong/curved.h +517 -0
  57. package/include/numkong/curved.hpp +144 -0
  58. package/include/numkong/dot/README.md +425 -0
  59. package/include/numkong/dot/alder.h +563 -0
  60. package/include/numkong/dot/genoa.h +315 -0
  61. package/include/numkong/dot/haswell.h +1688 -0
  62. package/include/numkong/dot/icelake.h +883 -0
  63. package/include/numkong/dot/neon.h +818 -0
  64. package/include/numkong/dot/neonbfdot.h +244 -0
  65. package/include/numkong/dot/neonfhm.h +360 -0
  66. package/include/numkong/dot/neonhalf.h +198 -0
  67. package/include/numkong/dot/neonsdot.h +508 -0
  68. package/include/numkong/dot/rvv.h +714 -0
  69. package/include/numkong/dot/rvvbb.h +72 -0
  70. package/include/numkong/dot/rvvbf16.h +123 -0
  71. package/include/numkong/dot/rvvhalf.h +129 -0
  72. package/include/numkong/dot/sapphire.h +141 -0
  73. package/include/numkong/dot/serial.h +838 -0
  74. package/include/numkong/dot/sierra.h +405 -0
  75. package/include/numkong/dot/skylake.h +1084 -0
  76. package/include/numkong/dot/sve.h +379 -0
  77. package/include/numkong/dot/svebfdot.h +74 -0
  78. package/include/numkong/dot/svehalf.h +123 -0
  79. package/include/numkong/dot/v128relaxed.h +1258 -0
  80. package/include/numkong/dot.h +1070 -0
  81. package/include/numkong/dot.hpp +94 -0
  82. package/include/numkong/dots/README.md +496 -0
  83. package/include/numkong/dots/alder.h +114 -0
  84. package/include/numkong/dots/genoa.h +94 -0
  85. package/include/numkong/dots/haswell.h +295 -0
  86. package/include/numkong/dots/icelake.h +171 -0
  87. package/include/numkong/dots/neon.h +120 -0
  88. package/include/numkong/dots/neonbfdot.h +58 -0
  89. package/include/numkong/dots/neonfhm.h +94 -0
  90. package/include/numkong/dots/neonhalf.h +57 -0
  91. package/include/numkong/dots/neonsdot.h +108 -0
  92. package/include/numkong/dots/rvv.h +2486 -0
  93. package/include/numkong/dots/sapphireamx.h +3973 -0
  94. package/include/numkong/dots/serial.h +2844 -0
  95. package/include/numkong/dots/sierra.h +97 -0
  96. package/include/numkong/dots/skylake.h +196 -0
  97. package/include/numkong/dots/sme.h +5372 -0
  98. package/include/numkong/dots/smebi32.h +461 -0
  99. package/include/numkong/dots/smef64.h +1318 -0
  100. package/include/numkong/dots/smehalf.h +47 -0
  101. package/include/numkong/dots/v128relaxed.h +294 -0
  102. package/include/numkong/dots.h +2804 -0
  103. package/include/numkong/dots.hpp +639 -0
  104. package/include/numkong/each/README.md +469 -0
  105. package/include/numkong/each/haswell.h +1658 -0
  106. package/include/numkong/each/icelake.h +272 -0
  107. package/include/numkong/each/neon.h +1104 -0
  108. package/include/numkong/each/neonbfdot.h +212 -0
  109. package/include/numkong/each/neonhalf.h +410 -0
  110. package/include/numkong/each/rvv.h +1121 -0
  111. package/include/numkong/each/sapphire.h +477 -0
  112. package/include/numkong/each/serial.h +260 -0
  113. package/include/numkong/each/skylake.h +1562 -0
  114. package/include/numkong/each.h +2146 -0
  115. package/include/numkong/each.hpp +434 -0
  116. package/include/numkong/geospatial/README.md +147 -0
  117. package/include/numkong/geospatial/haswell.h +593 -0
  118. package/include/numkong/geospatial/neon.h +571 -0
  119. package/include/numkong/geospatial/rvv.h +701 -0
  120. package/include/numkong/geospatial/serial.h +309 -0
  121. package/include/numkong/geospatial/skylake.h +577 -0
  122. package/include/numkong/geospatial/v128relaxed.h +613 -0
  123. package/include/numkong/geospatial.h +453 -0
  124. package/include/numkong/geospatial.hpp +235 -0
  125. package/include/numkong/matrix.hpp +336 -0
  126. package/include/numkong/maxsim/README.md +187 -0
  127. package/include/numkong/maxsim/alder.h +511 -0
  128. package/include/numkong/maxsim/genoa.h +115 -0
  129. package/include/numkong/maxsim/haswell.h +553 -0
  130. package/include/numkong/maxsim/icelake.h +480 -0
  131. package/include/numkong/maxsim/neonsdot.h +394 -0
  132. package/include/numkong/maxsim/sapphireamx.h +877 -0
  133. package/include/numkong/maxsim/serial.h +490 -0
  134. package/include/numkong/maxsim/sme.h +929 -0
  135. package/include/numkong/maxsim/v128relaxed.h +280 -0
  136. package/include/numkong/maxsim.h +571 -0
  137. package/include/numkong/maxsim.hpp +133 -0
  138. package/include/numkong/mesh/README.md +227 -0
  139. package/include/numkong/mesh/haswell.h +2235 -0
  140. package/include/numkong/mesh/neon.h +1329 -0
  141. package/include/numkong/mesh/neonbfdot.h +842 -0
  142. package/include/numkong/mesh/neonhalf.h +616 -0
  143. package/include/numkong/mesh/rvv.h +916 -0
  144. package/include/numkong/mesh/serial.h +742 -0
  145. package/include/numkong/mesh/skylake.h +1135 -0
  146. package/include/numkong/mesh/v128relaxed.h +1052 -0
  147. package/include/numkong/mesh.h +652 -0
  148. package/include/numkong/mesh.hpp +762 -0
  149. package/include/numkong/numkong.h +78 -0
  150. package/include/numkong/numkong.hpp +57 -0
  151. package/include/numkong/probability/README.md +173 -0
  152. package/include/numkong/probability/haswell.h +267 -0
  153. package/include/numkong/probability/neon.h +225 -0
  154. package/include/numkong/probability/rvv.h +409 -0
  155. package/include/numkong/probability/serial.h +169 -0
  156. package/include/numkong/probability/skylake.h +324 -0
  157. package/include/numkong/probability.h +383 -0
  158. package/include/numkong/probability.hpp +120 -0
  159. package/include/numkong/random.h +50 -0
  160. package/include/numkong/random.hpp +285 -0
  161. package/include/numkong/reduce/README.md +547 -0
  162. package/include/numkong/reduce/alder.h +632 -0
  163. package/include/numkong/reduce/genoa.h +201 -0
  164. package/include/numkong/reduce/haswell.h +3783 -0
  165. package/include/numkong/reduce/icelake.h +549 -0
  166. package/include/numkong/reduce/neon.h +3841 -0
  167. package/include/numkong/reduce/neonbfdot.h +353 -0
  168. package/include/numkong/reduce/neonfhm.h +665 -0
  169. package/include/numkong/reduce/neonhalf.h +157 -0
  170. package/include/numkong/reduce/neonsdot.h +357 -0
  171. package/include/numkong/reduce/rvv.h +3407 -0
  172. package/include/numkong/reduce/serial.h +757 -0
  173. package/include/numkong/reduce/sierra.h +338 -0
  174. package/include/numkong/reduce/skylake.h +3792 -0
  175. package/include/numkong/reduce/v128relaxed.h +2302 -0
  176. package/include/numkong/reduce.h +1597 -0
  177. package/include/numkong/reduce.hpp +633 -0
  178. package/include/numkong/scalar/README.md +89 -0
  179. package/include/numkong/scalar/haswell.h +113 -0
  180. package/include/numkong/scalar/neon.h +122 -0
  181. package/include/numkong/scalar/neonhalf.h +70 -0
  182. package/include/numkong/scalar/rvv.h +211 -0
  183. package/include/numkong/scalar/sapphire.h +63 -0
  184. package/include/numkong/scalar/serial.h +332 -0
  185. package/include/numkong/scalar/v128relaxed.h +56 -0
  186. package/include/numkong/scalar.h +683 -0
  187. package/include/numkong/set/README.md +179 -0
  188. package/include/numkong/set/haswell.h +334 -0
  189. package/include/numkong/set/icelake.h +485 -0
  190. package/include/numkong/set/neon.h +364 -0
  191. package/include/numkong/set/rvv.h +226 -0
  192. package/include/numkong/set/rvvbb.h +117 -0
  193. package/include/numkong/set/serial.h +174 -0
  194. package/include/numkong/set/sve.h +185 -0
  195. package/include/numkong/set/v128relaxed.h +240 -0
  196. package/include/numkong/set.h +457 -0
  197. package/include/numkong/set.hpp +114 -0
  198. package/include/numkong/sets/README.md +149 -0
  199. package/include/numkong/sets/haswell.h +63 -0
  200. package/include/numkong/sets/icelake.h +66 -0
  201. package/include/numkong/sets/neon.h +61 -0
  202. package/include/numkong/sets/serial.h +43 -0
  203. package/include/numkong/sets/smebi32.h +1099 -0
  204. package/include/numkong/sets/v128relaxed.h +58 -0
  205. package/include/numkong/sets.h +339 -0
  206. package/include/numkong/sparse/README.md +156 -0
  207. package/include/numkong/sparse/icelake.h +463 -0
  208. package/include/numkong/sparse/neon.h +288 -0
  209. package/include/numkong/sparse/serial.h +117 -0
  210. package/include/numkong/sparse/sve2.h +507 -0
  211. package/include/numkong/sparse/turin.h +322 -0
  212. package/include/numkong/sparse.h +363 -0
  213. package/include/numkong/sparse.hpp +113 -0
  214. package/include/numkong/spatial/README.md +435 -0
  215. package/include/numkong/spatial/alder.h +607 -0
  216. package/include/numkong/spatial/genoa.h +290 -0
  217. package/include/numkong/spatial/haswell.h +960 -0
  218. package/include/numkong/spatial/icelake.h +586 -0
  219. package/include/numkong/spatial/neon.h +773 -0
  220. package/include/numkong/spatial/neonbfdot.h +165 -0
  221. package/include/numkong/spatial/neonhalf.h +118 -0
  222. package/include/numkong/spatial/neonsdot.h +261 -0
  223. package/include/numkong/spatial/rvv.h +984 -0
  224. package/include/numkong/spatial/rvvbf16.h +123 -0
  225. package/include/numkong/spatial/rvvhalf.h +117 -0
  226. package/include/numkong/spatial/sapphire.h +343 -0
  227. package/include/numkong/spatial/serial.h +346 -0
  228. package/include/numkong/spatial/sierra.h +323 -0
  229. package/include/numkong/spatial/skylake.h +606 -0
  230. package/include/numkong/spatial/sve.h +224 -0
  231. package/include/numkong/spatial/svebfdot.h +122 -0
  232. package/include/numkong/spatial/svehalf.h +109 -0
  233. package/include/numkong/spatial/v128relaxed.h +717 -0
  234. package/include/numkong/spatial.h +1425 -0
  235. package/include/numkong/spatial.hpp +183 -0
  236. package/include/numkong/spatials/README.md +580 -0
  237. package/include/numkong/spatials/alder.h +94 -0
  238. package/include/numkong/spatials/genoa.h +94 -0
  239. package/include/numkong/spatials/haswell.h +219 -0
  240. package/include/numkong/spatials/icelake.h +113 -0
  241. package/include/numkong/spatials/neon.h +109 -0
  242. package/include/numkong/spatials/neonbfdot.h +60 -0
  243. package/include/numkong/spatials/neonfhm.h +92 -0
  244. package/include/numkong/spatials/neonhalf.h +58 -0
  245. package/include/numkong/spatials/neonsdot.h +109 -0
  246. package/include/numkong/spatials/rvv.h +1960 -0
  247. package/include/numkong/spatials/sapphireamx.h +1149 -0
  248. package/include/numkong/spatials/serial.h +226 -0
  249. package/include/numkong/spatials/sierra.h +96 -0
  250. package/include/numkong/spatials/skylake.h +184 -0
  251. package/include/numkong/spatials/sme.h +1901 -0
  252. package/include/numkong/spatials/smef64.h +465 -0
  253. package/include/numkong/spatials/v128relaxed.h +240 -0
  254. package/include/numkong/spatials.h +3021 -0
  255. package/include/numkong/spatials.hpp +508 -0
  256. package/include/numkong/tensor.hpp +1592 -0
  257. package/include/numkong/trigonometry/README.md +184 -0
  258. package/include/numkong/trigonometry/haswell.h +652 -0
  259. package/include/numkong/trigonometry/neon.h +639 -0
  260. package/include/numkong/trigonometry/rvv.h +699 -0
  261. package/include/numkong/trigonometry/serial.h +703 -0
  262. package/include/numkong/trigonometry/skylake.h +721 -0
  263. package/include/numkong/trigonometry/v128relaxed.h +666 -0
  264. package/include/numkong/trigonometry.h +467 -0
  265. package/include/numkong/trigonometry.hpp +166 -0
  266. package/include/numkong/types.h +1384 -0
  267. package/include/numkong/types.hpp +5603 -0
  268. package/include/numkong/vector.hpp +698 -0
  269. package/javascript/README.md +246 -0
  270. package/javascript/dist/cjs/numkong-wasm.d.ts +166 -0
  271. package/javascript/dist/cjs/numkong-wasm.js +617 -0
  272. package/javascript/dist/cjs/numkong.d.ts +343 -0
  273. package/javascript/dist/cjs/numkong.js +523 -0
  274. package/javascript/dist/cjs/package.json +3 -0
  275. package/javascript/dist/cjs/types.d.ts +284 -0
  276. package/javascript/dist/cjs/types.js +653 -0
  277. package/javascript/dist/esm/numkong-wasm.d.ts +166 -0
  278. package/javascript/dist/esm/numkong-wasm.js +595 -0
  279. package/javascript/dist/esm/numkong.d.ts +343 -0
  280. package/javascript/dist/esm/numkong.js +452 -0
  281. package/javascript/dist/esm/package.json +3 -0
  282. package/javascript/dist/esm/types.d.ts +284 -0
  283. package/javascript/dist/esm/types.js +630 -0
  284. package/javascript/dist-package-cjs.json +3 -0
  285. package/javascript/dist-package-esm.json +3 -0
  286. package/javascript/node-gyp-build.d.ts +1 -0
  287. package/javascript/numkong-wasm.ts +756 -0
  288. package/javascript/numkong.c +689 -0
  289. package/javascript/numkong.ts +575 -0
  290. package/javascript/tsconfig-base.json +39 -0
  291. package/javascript/tsconfig-cjs.json +8 -0
  292. package/javascript/tsconfig-esm.json +8 -0
  293. package/javascript/types.ts +674 -0
  294. package/package.json +87 -0
package/c/dispatch.h ADDED
@@ -0,0 +1,512 @@
1
+ /**
2
+ * @brief Common Definitions for Dispatch Files.
3
+ * @file c/dispatch.h
4
+ * @author Ash Vardanian
5
+ * @date February 3, 2026
6
+ */
7
+ #ifndef NK_DISPATCH_H
8
+ #define NK_DISPATCH_H
9
+
10
+ #define NK_DYNAMIC_DISPATCH 1
11
+ #define NK_NATIVE_F16 0
12
+ #define NK_NATIVE_BF16 0
13
+
14
+ /* NK_TARGET_* defines are set by the build system:
15
+ * - Python: setup.py
16
+ * - Rust: build.rs
17
+ * - Node.js: binding.gyp
18
+ * - CMake: CMakeLists.txt
19
+ *
20
+ * For header-only usage without a build system, types.h provides
21
+ * compiler-intrinsic-based fallback detection.
22
+ *
23
+ * OS/compiler capabilities summary:
24
+ * - Linux: everything available in GCC 12+ and Clang 16+.
25
+ * - FreeBSD: same as Linux, except AMX (no kernel tile permission support).
26
+ * - Windows - MSVC: Haswell/Skylake/Icelake, plus Sapphire FP16 (MSVC 2022 17.2+).
27
+ * - macOS - Apple Clang: only Arm NEON and x86 AVX2 Haswell extensions.
28
+ */
29
+
30
+ #include <numkong/numkong.h>
31
+
32
+ #ifdef __cplusplus
33
+ extern "C" {
34
+ #endif
35
+
36
+ // Forward declaration of dispatch table type (same structure as in numkong.c)
37
+ typedef struct {
38
+ // Dot products
39
+ nk_metric_dense_punned_t dot_f64c;
40
+ nk_metric_dense_punned_t dot_f32c;
41
+ nk_metric_dense_punned_t dot_bf16c;
42
+ nk_metric_dense_punned_t dot_f16c;
43
+ nk_metric_dense_punned_t dot_f64;
44
+ nk_metric_dense_punned_t dot_f32;
45
+ nk_metric_dense_punned_t dot_bf16;
46
+ nk_metric_dense_punned_t dot_f16;
47
+ nk_metric_dense_punned_t dot_e5m2;
48
+ nk_metric_dense_punned_t dot_e4m3;
49
+ nk_metric_dense_punned_t dot_e3m2;
50
+ nk_metric_dense_punned_t dot_e2m3;
51
+ nk_metric_dense_punned_t dot_i8;
52
+ nk_metric_dense_punned_t dot_u8;
53
+ nk_metric_dense_punned_t dot_i4;
54
+ nk_metric_dense_punned_t dot_u4;
55
+ nk_metric_dense_punned_t dot_u1;
56
+ nk_metric_dense_punned_t vdot_f64c;
57
+ nk_metric_dense_punned_t vdot_f32c;
58
+ nk_metric_dense_punned_t vdot_bf16c;
59
+ nk_metric_dense_punned_t vdot_f16c;
60
+ // Angular distances
61
+ nk_metric_dense_punned_t angular_f64;
62
+ nk_metric_dense_punned_t angular_f32;
63
+ nk_metric_dense_punned_t angular_bf16;
64
+ nk_metric_dense_punned_t angular_f16;
65
+ nk_metric_dense_punned_t angular_e5m2;
66
+ nk_metric_dense_punned_t angular_e4m3;
67
+ nk_metric_dense_punned_t angular_e3m2;
68
+ nk_metric_dense_punned_t angular_e2m3;
69
+ nk_metric_dense_punned_t angular_i8;
70
+ nk_metric_dense_punned_t angular_i4;
71
+ nk_metric_dense_punned_t angular_u8;
72
+ nk_metric_dense_punned_t angular_u4;
73
+ // Euclidean distances
74
+ nk_metric_dense_punned_t euclidean_f64;
75
+ nk_metric_dense_punned_t euclidean_f32;
76
+ nk_metric_dense_punned_t euclidean_bf16;
77
+ nk_metric_dense_punned_t euclidean_f16;
78
+ nk_metric_dense_punned_t euclidean_e5m2;
79
+ nk_metric_dense_punned_t euclidean_e4m3;
80
+ nk_metric_dense_punned_t euclidean_e3m2;
81
+ nk_metric_dense_punned_t euclidean_e2m3;
82
+ nk_metric_dense_punned_t euclidean_i8;
83
+ nk_metric_dense_punned_t euclidean_i4;
84
+ nk_metric_dense_punned_t euclidean_u8;
85
+ nk_metric_dense_punned_t euclidean_u4;
86
+ // Squared Euclidean distances
87
+ nk_metric_dense_punned_t sqeuclidean_f64;
88
+ nk_metric_dense_punned_t sqeuclidean_f32;
89
+ nk_metric_dense_punned_t sqeuclidean_bf16;
90
+ nk_metric_dense_punned_t sqeuclidean_f16;
91
+ nk_metric_dense_punned_t sqeuclidean_e5m2;
92
+ nk_metric_dense_punned_t sqeuclidean_e4m3;
93
+ nk_metric_dense_punned_t sqeuclidean_e3m2;
94
+ nk_metric_dense_punned_t sqeuclidean_e2m3;
95
+ nk_metric_dense_punned_t sqeuclidean_i8;
96
+ nk_metric_dense_punned_t sqeuclidean_i4;
97
+ nk_metric_dense_punned_t sqeuclidean_u8;
98
+ nk_metric_dense_punned_t sqeuclidean_u4;
99
+ // Binary distances
100
+ nk_metric_dense_punned_t hamming_u8;
101
+ nk_metric_dense_punned_t hamming_u1;
102
+ nk_metric_dense_punned_t jaccard_u32;
103
+ nk_metric_dense_punned_t jaccard_u16;
104
+ nk_metric_dense_punned_t jaccard_u1;
105
+ // Curved spaces
106
+ nk_metric_curved_punned_t bilinear_f64c;
107
+ nk_metric_curved_punned_t bilinear_f32c;
108
+ nk_metric_curved_punned_t bilinear_bf16c;
109
+ nk_metric_curved_punned_t bilinear_f16c;
110
+ nk_metric_curved_punned_t bilinear_f64;
111
+ nk_metric_curved_punned_t bilinear_f32;
112
+ nk_metric_curved_punned_t bilinear_bf16;
113
+ nk_metric_curved_punned_t bilinear_f16;
114
+ nk_metric_curved_punned_t mahalanobis_f64;
115
+ nk_metric_curved_punned_t mahalanobis_f32;
116
+ nk_metric_curved_punned_t mahalanobis_bf16;
117
+ nk_metric_curved_punned_t mahalanobis_f16;
118
+ // Geospatial distances
119
+ nk_metric_geospatial_punned_t haversine_f64;
120
+ nk_metric_geospatial_punned_t haversine_f32;
121
+ nk_metric_geospatial_punned_t vincenty_f64;
122
+ nk_metric_geospatial_punned_t vincenty_f32;
123
+ // Probability distributions
124
+ nk_metric_dense_punned_t kld_f64;
125
+ nk_metric_dense_punned_t kld_f32;
126
+ nk_metric_dense_punned_t kld_bf16;
127
+ nk_metric_dense_punned_t kld_f16;
128
+ nk_metric_dense_punned_t jsd_f64;
129
+ nk_metric_dense_punned_t jsd_f32;
130
+ nk_metric_dense_punned_t jsd_bf16;
131
+ nk_metric_dense_punned_t jsd_f16;
132
+ // Mesh alignment
133
+ nk_metric_mesh_punned_t rmsd_f64;
134
+ nk_metric_mesh_punned_t rmsd_f32;
135
+ nk_metric_mesh_punned_t rmsd_bf16;
136
+ nk_metric_mesh_punned_t rmsd_f16;
137
+ nk_metric_mesh_punned_t kabsch_f64;
138
+ nk_metric_mesh_punned_t kabsch_f32;
139
+ nk_metric_mesh_punned_t kabsch_bf16;
140
+ nk_metric_mesh_punned_t kabsch_f16;
141
+ nk_metric_mesh_punned_t umeyama_f64;
142
+ nk_metric_mesh_punned_t umeyama_f32;
143
+ nk_metric_mesh_punned_t umeyama_bf16;
144
+ nk_metric_mesh_punned_t umeyama_f16;
145
+ // Sparse intersections
146
+ nk_sparse_intersect_punned_t sparse_intersect_u64;
147
+ nk_sparse_intersect_punned_t sparse_intersect_u32;
148
+ nk_sparse_intersect_punned_t sparse_intersect_u16;
149
+ // Sparse dot products
150
+ nk_sparse_dot_punned_t sparse_dot_u32f32;
151
+ nk_sparse_dot_punned_t sparse_dot_u16bf16;
152
+ // Element-wise scale
153
+ nk_each_scale_punned_t each_scale_f64c;
154
+ nk_each_scale_punned_t each_scale_f32c;
155
+ nk_each_scale_punned_t each_scale_f64;
156
+ nk_each_scale_punned_t each_scale_f32;
157
+ nk_each_scale_punned_t each_scale_bf16;
158
+ nk_each_scale_punned_t each_scale_f16;
159
+ nk_each_scale_punned_t each_scale_e5m2;
160
+ nk_each_scale_punned_t each_scale_e4m3;
161
+ nk_each_scale_punned_t each_scale_e3m2;
162
+ nk_each_scale_punned_t each_scale_e2m3;
163
+ nk_each_scale_punned_t each_scale_i64;
164
+ nk_each_scale_punned_t each_scale_i32;
165
+ nk_each_scale_punned_t each_scale_i16;
166
+ nk_each_scale_punned_t each_scale_i8;
167
+ nk_each_scale_punned_t each_scale_u64;
168
+ nk_each_scale_punned_t each_scale_u32;
169
+ nk_each_scale_punned_t each_scale_u16;
170
+ nk_each_scale_punned_t each_scale_u8;
171
+ // Element-wise sum
172
+ nk_each_sum_punned_t each_sum_f64c;
173
+ nk_each_sum_punned_t each_sum_f32c;
174
+ nk_each_sum_punned_t each_sum_f64;
175
+ nk_each_sum_punned_t each_sum_f32;
176
+ nk_each_sum_punned_t each_sum_bf16;
177
+ nk_each_sum_punned_t each_sum_f16;
178
+ nk_each_sum_punned_t each_sum_e5m2;
179
+ nk_each_sum_punned_t each_sum_e4m3;
180
+ nk_each_sum_punned_t each_sum_e3m2;
181
+ nk_each_sum_punned_t each_sum_e2m3;
182
+ nk_each_sum_punned_t each_sum_i64;
183
+ nk_each_sum_punned_t each_sum_i32;
184
+ nk_each_sum_punned_t each_sum_i16;
185
+ nk_each_sum_punned_t each_sum_i8;
186
+ nk_each_sum_punned_t each_sum_u64;
187
+ nk_each_sum_punned_t each_sum_u32;
188
+ nk_each_sum_punned_t each_sum_u16;
189
+ nk_each_sum_punned_t each_sum_u8;
190
+ // Element-wise blend
191
+ nk_each_blend_punned_t each_blend_f64c;
192
+ nk_each_blend_punned_t each_blend_f32c;
193
+ nk_each_blend_punned_t each_blend_f64;
194
+ nk_each_blend_punned_t each_blend_f32;
195
+ nk_each_blend_punned_t each_blend_bf16;
196
+ nk_each_blend_punned_t each_blend_f16;
197
+ nk_each_blend_punned_t each_blend_e5m2;
198
+ nk_each_blend_punned_t each_blend_e4m3;
199
+ nk_each_blend_punned_t each_blend_e3m2;
200
+ nk_each_blend_punned_t each_blend_e2m3;
201
+ nk_each_blend_punned_t each_blend_i64;
202
+ nk_each_blend_punned_t each_blend_i32;
203
+ nk_each_blend_punned_t each_blend_i16;
204
+ nk_each_blend_punned_t each_blend_i8;
205
+ nk_each_blend_punned_t each_blend_u64;
206
+ nk_each_blend_punned_t each_blend_u32;
207
+ nk_each_blend_punned_t each_blend_u16;
208
+ nk_each_blend_punned_t each_blend_u8;
209
+ // Element-wise FMA
210
+ nk_each_fma_punned_t each_fma_f64c;
211
+ nk_each_fma_punned_t each_fma_f32c;
212
+ nk_each_fma_punned_t each_fma_f64;
213
+ nk_each_fma_punned_t each_fma_f32;
214
+ nk_each_fma_punned_t each_fma_bf16;
215
+ nk_each_fma_punned_t each_fma_f16;
216
+ nk_each_fma_punned_t each_fma_e5m2;
217
+ nk_each_fma_punned_t each_fma_e4m3;
218
+ nk_each_fma_punned_t each_fma_e3m2;
219
+ nk_each_fma_punned_t each_fma_e2m3;
220
+ nk_each_fma_punned_t each_fma_i64;
221
+ nk_each_fma_punned_t each_fma_i32;
222
+ nk_each_fma_punned_t each_fma_i16;
223
+ nk_each_fma_punned_t each_fma_i8;
224
+ nk_each_fma_punned_t each_fma_u64;
225
+ nk_each_fma_punned_t each_fma_u32;
226
+ nk_each_fma_punned_t each_fma_u16;
227
+ nk_each_fma_punned_t each_fma_u8;
228
+ // Trigonometry
229
+ nk_kernel_trigonometry_punned_t each_sin_f64;
230
+ nk_kernel_trigonometry_punned_t each_sin_f32;
231
+ nk_kernel_trigonometry_punned_t each_sin_f16;
232
+ nk_kernel_trigonometry_punned_t each_cos_f64;
233
+ nk_kernel_trigonometry_punned_t each_cos_f32;
234
+ nk_kernel_trigonometry_punned_t each_cos_f16;
235
+ nk_kernel_trigonometry_punned_t each_atan_f64;
236
+ nk_kernel_trigonometry_punned_t each_atan_f32;
237
+ nk_kernel_trigonometry_punned_t each_atan_f16;
238
+ // Reduce moments (sum + sum-of-squares)
239
+ nk_kernel_reduce_moments_punned_t reduce_moments_f64;
240
+ nk_kernel_reduce_moments_punned_t reduce_moments_f32;
241
+ nk_kernel_reduce_moments_punned_t reduce_moments_bf16;
242
+ nk_kernel_reduce_moments_punned_t reduce_moments_f16;
243
+ nk_kernel_reduce_moments_punned_t reduce_moments_e5m2;
244
+ nk_kernel_reduce_moments_punned_t reduce_moments_e4m3;
245
+ nk_kernel_reduce_moments_punned_t reduce_moments_e3m2;
246
+ nk_kernel_reduce_moments_punned_t reduce_moments_e2m3;
247
+ nk_kernel_reduce_moments_punned_t reduce_moments_i64;
248
+ nk_kernel_reduce_moments_punned_t reduce_moments_i32;
249
+ nk_kernel_reduce_moments_punned_t reduce_moments_i16;
250
+ nk_kernel_reduce_moments_punned_t reduce_moments_i8;
251
+ nk_kernel_reduce_moments_punned_t reduce_moments_i4;
252
+ nk_kernel_reduce_moments_punned_t reduce_moments_u64;
253
+ nk_kernel_reduce_moments_punned_t reduce_moments_u32;
254
+ nk_kernel_reduce_moments_punned_t reduce_moments_u16;
255
+ nk_kernel_reduce_moments_punned_t reduce_moments_u8;
256
+ nk_kernel_reduce_moments_punned_t reduce_moments_u4;
257
+ nk_kernel_reduce_moments_punned_t reduce_moments_u1;
258
+ // Reduce minmax (min + argmin + max + argmax)
259
+ nk_kernel_reduce_minmax_punned_t reduce_minmax_f64;
260
+ nk_kernel_reduce_minmax_punned_t reduce_minmax_f32;
261
+ nk_kernel_reduce_minmax_punned_t reduce_minmax_bf16;
262
+ nk_kernel_reduce_minmax_punned_t reduce_minmax_f16;
263
+ nk_kernel_reduce_minmax_punned_t reduce_minmax_e5m2;
264
+ nk_kernel_reduce_minmax_punned_t reduce_minmax_e4m3;
265
+ nk_kernel_reduce_minmax_punned_t reduce_minmax_e3m2;
266
+ nk_kernel_reduce_minmax_punned_t reduce_minmax_e2m3;
267
+ nk_kernel_reduce_minmax_punned_t reduce_minmax_i64;
268
+ nk_kernel_reduce_minmax_punned_t reduce_minmax_i32;
269
+ nk_kernel_reduce_minmax_punned_t reduce_minmax_i16;
270
+ nk_kernel_reduce_minmax_punned_t reduce_minmax_i8;
271
+ nk_kernel_reduce_minmax_punned_t reduce_minmax_i4;
272
+ nk_kernel_reduce_minmax_punned_t reduce_minmax_u64;
273
+ nk_kernel_reduce_minmax_punned_t reduce_minmax_u32;
274
+ nk_kernel_reduce_minmax_punned_t reduce_minmax_u16;
275
+ nk_kernel_reduce_minmax_punned_t reduce_minmax_u8;
276
+ nk_kernel_reduce_minmax_punned_t reduce_minmax_u4;
277
+ nk_kernel_reduce_minmax_punned_t reduce_minmax_u1;
278
+ // Dots packed size
279
+ nk_dots_packed_size_punned_t dots_packed_size_f64;
280
+ nk_dots_packed_size_punned_t dots_packed_size_f32;
281
+ nk_dots_packed_size_punned_t dots_packed_size_bf16;
282
+ nk_dots_packed_size_punned_t dots_packed_size_f16;
283
+ nk_dots_packed_size_punned_t dots_packed_size_e5m2;
284
+ nk_dots_packed_size_punned_t dots_packed_size_e4m3;
285
+ nk_dots_packed_size_punned_t dots_packed_size_e3m2;
286
+ nk_dots_packed_size_punned_t dots_packed_size_e2m3;
287
+ nk_dots_packed_size_punned_t dots_packed_size_i8;
288
+ nk_dots_packed_size_punned_t dots_packed_size_i4;
289
+ nk_dots_packed_size_punned_t dots_packed_size_u8;
290
+ nk_dots_packed_size_punned_t dots_packed_size_u4;
291
+ nk_dots_packed_size_punned_t dots_packed_size_u1;
292
+ // Dots pack
293
+ nk_dots_pack_punned_t dots_pack_f64;
294
+ nk_dots_pack_punned_t dots_pack_f32;
295
+ nk_dots_pack_punned_t dots_pack_bf16;
296
+ nk_dots_pack_punned_t dots_pack_f16;
297
+ nk_dots_pack_punned_t dots_pack_e5m2;
298
+ nk_dots_pack_punned_t dots_pack_e4m3;
299
+ nk_dots_pack_punned_t dots_pack_e3m2;
300
+ nk_dots_pack_punned_t dots_pack_e2m3;
301
+ nk_dots_pack_punned_t dots_pack_i8;
302
+ nk_dots_pack_punned_t dots_pack_i4;
303
+ nk_dots_pack_punned_t dots_pack_u8;
304
+ nk_dots_pack_punned_t dots_pack_u4;
305
+ nk_dots_pack_punned_t dots_pack_u1;
306
+ // Dots packed
307
+ nk_dots_packed_punned_t dots_packed_f64;
308
+ nk_dots_packed_punned_t dots_packed_f32;
309
+ nk_dots_packed_punned_t dots_packed_bf16;
310
+ nk_dots_packed_punned_t dots_packed_f16;
311
+ nk_dots_packed_punned_t dots_packed_e5m2;
312
+ nk_dots_packed_punned_t dots_packed_e4m3;
313
+ nk_dots_packed_punned_t dots_packed_e3m2;
314
+ nk_dots_packed_punned_t dots_packed_e2m3;
315
+ nk_dots_packed_punned_t dots_packed_i8;
316
+ nk_dots_packed_punned_t dots_packed_i4;
317
+ nk_dots_packed_punned_t dots_packed_u8;
318
+ nk_dots_packed_punned_t dots_packed_u4;
319
+ nk_dots_packed_punned_t dots_packed_u1;
320
+ // Sets packed
321
+ nk_hammings_packed_punned_t hammings_packed_u1;
322
+ nk_jaccards_packed_punned_t jaccards_packed_u1;
323
+ // Dots symmetric
324
+ nk_dots_symmetric_punned_t dots_symmetric_f64;
325
+ nk_dots_symmetric_punned_t dots_symmetric_f32;
326
+ nk_dots_symmetric_punned_t dots_symmetric_bf16;
327
+ nk_dots_symmetric_punned_t dots_symmetric_f16;
328
+ nk_dots_symmetric_punned_t dots_symmetric_e5m2;
329
+ nk_dots_symmetric_punned_t dots_symmetric_e4m3;
330
+ nk_dots_symmetric_punned_t dots_symmetric_e3m2;
331
+ nk_dots_symmetric_punned_t dots_symmetric_e2m3;
332
+ nk_dots_symmetric_punned_t dots_symmetric_i8;
333
+ nk_dots_symmetric_punned_t dots_symmetric_i4;
334
+ nk_dots_symmetric_punned_t dots_symmetric_u8;
335
+ nk_dots_symmetric_punned_t dots_symmetric_u4;
336
+ nk_dots_symmetric_punned_t dots_symmetric_u1;
337
+ // Sets symmetric
338
+ nk_hammings_symmetric_punned_t hammings_symmetric_u1;
339
+ nk_jaccards_symmetric_punned_t jaccards_symmetric_u1;
340
+ // Angulars packed
341
+ nk_angulars_packed_punned_t angulars_packed_f64;
342
+ nk_angulars_packed_punned_t angulars_packed_f32;
343
+ nk_angulars_packed_punned_t angulars_packed_bf16;
344
+ nk_angulars_packed_punned_t angulars_packed_f16;
345
+ nk_angulars_packed_punned_t angulars_packed_e5m2;
346
+ nk_angulars_packed_punned_t angulars_packed_e4m3;
347
+ nk_angulars_packed_punned_t angulars_packed_e3m2;
348
+ nk_angulars_packed_punned_t angulars_packed_e2m3;
349
+ nk_angulars_packed_punned_t angulars_packed_i8;
350
+ nk_angulars_packed_punned_t angulars_packed_i4;
351
+ nk_angulars_packed_punned_t angulars_packed_u8;
352
+ nk_angulars_packed_punned_t angulars_packed_u4;
353
+ // Angulars symmetric
354
+ nk_angulars_symmetric_punned_t angulars_symmetric_f64;
355
+ nk_angulars_symmetric_punned_t angulars_symmetric_f32;
356
+ nk_angulars_symmetric_punned_t angulars_symmetric_bf16;
357
+ nk_angulars_symmetric_punned_t angulars_symmetric_f16;
358
+ nk_angulars_symmetric_punned_t angulars_symmetric_e5m2;
359
+ nk_angulars_symmetric_punned_t angulars_symmetric_e4m3;
360
+ nk_angulars_symmetric_punned_t angulars_symmetric_e3m2;
361
+ nk_angulars_symmetric_punned_t angulars_symmetric_e2m3;
362
+ nk_angulars_symmetric_punned_t angulars_symmetric_i8;
363
+ nk_angulars_symmetric_punned_t angulars_symmetric_i4;
364
+ nk_angulars_symmetric_punned_t angulars_symmetric_u8;
365
+ nk_angulars_symmetric_punned_t angulars_symmetric_u4;
366
+ // Euclideans packed
367
+ nk_euclideans_packed_punned_t euclideans_packed_f64;
368
+ nk_euclideans_packed_punned_t euclideans_packed_f32;
369
+ nk_euclideans_packed_punned_t euclideans_packed_bf16;
370
+ nk_euclideans_packed_punned_t euclideans_packed_f16;
371
+ nk_euclideans_packed_punned_t euclideans_packed_e5m2;
372
+ nk_euclideans_packed_punned_t euclideans_packed_e4m3;
373
+ nk_euclideans_packed_punned_t euclideans_packed_e3m2;
374
+ nk_euclideans_packed_punned_t euclideans_packed_e2m3;
375
+ nk_euclideans_packed_punned_t euclideans_packed_i8;
376
+ nk_euclideans_packed_punned_t euclideans_packed_i4;
377
+ nk_euclideans_packed_punned_t euclideans_packed_u8;
378
+ nk_euclideans_packed_punned_t euclideans_packed_u4;
379
+ // Euclideans symmetric
380
+ nk_euclideans_symmetric_punned_t euclideans_symmetric_f64;
381
+ nk_euclideans_symmetric_punned_t euclideans_symmetric_f32;
382
+ nk_euclideans_symmetric_punned_t euclideans_symmetric_bf16;
383
+ nk_euclideans_symmetric_punned_t euclideans_symmetric_f16;
384
+ nk_euclideans_symmetric_punned_t euclideans_symmetric_e5m2;
385
+ nk_euclideans_symmetric_punned_t euclideans_symmetric_e4m3;
386
+ nk_euclideans_symmetric_punned_t euclideans_symmetric_e3m2;
387
+ nk_euclideans_symmetric_punned_t euclideans_symmetric_e2m3;
388
+ nk_euclideans_symmetric_punned_t euclideans_symmetric_i8;
389
+ nk_euclideans_symmetric_punned_t euclideans_symmetric_i4;
390
+ nk_euclideans_symmetric_punned_t euclideans_symmetric_u8;
391
+ nk_euclideans_symmetric_punned_t euclideans_symmetric_u4;
392
+ // MaxSim packed size
393
+ nk_dots_packed_size_punned_t maxsim_packed_size_f32;
394
+ nk_dots_packed_size_punned_t maxsim_packed_size_bf16;
395
+ nk_dots_packed_size_punned_t maxsim_packed_size_f16;
396
+ // MaxSim pack
397
+ nk_dots_pack_punned_t maxsim_pack_f32;
398
+ nk_dots_pack_punned_t maxsim_pack_bf16;
399
+ nk_dots_pack_punned_t maxsim_pack_f16;
400
+ // MaxSim packed
401
+ nk_maxsim_packed_punned_t maxsim_packed_f32;
402
+ nk_maxsim_packed_punned_t maxsim_packed_bf16;
403
+ nk_maxsim_packed_punned_t maxsim_packed_f16;
404
+ // Type casting
405
+ nk_kernel_cast_punned_t cast;
406
+ // Scalar conversions
407
+ void (*bf16_to_f32)(nk_bf16_t const *, nk_f32_t *);
408
+ void (*f32_to_bf16)(nk_f32_t const *, nk_bf16_t *);
409
+ void (*f16_to_f32)(nk_f16_t const *, nk_f32_t *);
410
+ void (*f32_to_f16)(nk_f32_t const *, nk_f16_t *);
411
+ void (*e5m2_to_f32)(nk_e5m2_t const *, nk_f32_t *);
412
+ void (*f32_to_e5m2)(nk_f32_t const *, nk_e5m2_t *);
413
+ void (*e4m3_to_f32)(nk_e4m3_t const *, nk_f32_t *);
414
+ void (*f32_to_e4m3)(nk_f32_t const *, nk_e4m3_t *);
415
+ void (*e3m2_to_f32)(nk_e3m2_t const *, nk_f32_t *);
416
+ void (*f32_to_e3m2)(nk_f32_t const *, nk_e3m2_t *);
417
+ void (*e2m3_to_f32)(nk_e2m3_t const *, nk_f32_t *);
418
+ void (*f32_to_e2m3)(nk_f32_t const *, nk_e2m3_t *);
419
+ // Scalar math
420
+ nk_f64_t (*f64_sqrt)(nk_f64_t);
421
+ nk_f64_t (*f64_rsqrt)(nk_f64_t);
422
+ nk_f64_t (*f64_fma)(nk_f64_t, nk_f64_t, nk_f64_t);
423
+ nk_f32_t (*f32_sqrt)(nk_f32_t);
424
+ nk_f32_t (*f32_rsqrt)(nk_f32_t);
425
+ nk_f32_t (*f32_fma)(nk_f32_t, nk_f32_t, nk_f32_t);
426
+ nk_f16_t (*f16_sqrt)(nk_f16_t);
427
+ nk_f16_t (*f16_rsqrt)(nk_f16_t);
428
+ nk_f16_t (*f16_fma)(nk_f16_t, nk_f16_t, nk_f16_t);
429
+ // Scalar saturating arithmetic
430
+ nk_i64_t (*i64_saturating_add)(nk_i64_t, nk_i64_t);
431
+ nk_i64_t (*i64_saturating_mul)(nk_i64_t, nk_i64_t);
432
+ nk_i32_t (*i32_saturating_add)(nk_i32_t, nk_i32_t);
433
+ nk_i32_t (*i32_saturating_mul)(nk_i32_t, nk_i32_t);
434
+ nk_i16_t (*i16_saturating_add)(nk_i16_t, nk_i16_t);
435
+ nk_i16_t (*i16_saturating_mul)(nk_i16_t, nk_i16_t);
436
+ nk_i8_t (*i8_saturating_add)(nk_i8_t, nk_i8_t);
437
+ nk_i8_t (*i8_saturating_mul)(nk_i8_t, nk_i8_t);
438
+ nk_i4x2_t (*i4x2_saturating_add)(nk_i4x2_t, nk_i4x2_t);
439
+ nk_i4x2_t (*i4x2_saturating_mul)(nk_i4x2_t, nk_i4x2_t);
440
+ nk_u64_t (*u64_saturating_add)(nk_u64_t, nk_u64_t);
441
+ nk_u64_t (*u64_saturating_mul)(nk_u64_t, nk_u64_t);
442
+ nk_u32_t (*u32_saturating_add)(nk_u32_t, nk_u32_t);
443
+ nk_u32_t (*u32_saturating_mul)(nk_u32_t, nk_u32_t);
444
+ nk_u16_t (*u16_saturating_add)(nk_u16_t, nk_u16_t);
445
+ nk_u16_t (*u16_saturating_mul)(nk_u16_t, nk_u16_t);
446
+ nk_u8_t (*u8_saturating_add)(nk_u8_t, nk_u8_t);
447
+ nk_u8_t (*u8_saturating_mul)(nk_u8_t, nk_u8_t);
448
+ nk_u4x2_t (*u4x2_saturating_add)(nk_u4x2_t, nk_u4x2_t);
449
+ nk_u4x2_t (*u4x2_saturating_mul)(nk_u4x2_t, nk_u4x2_t);
450
+ // Scalar ordering
451
+ int (*bf16_order)(nk_bf16_t, nk_bf16_t);
452
+ int (*f16_order)(nk_f16_t, nk_f16_t);
453
+ int (*e5m2_order)(nk_e5m2_t, nk_e5m2_t);
454
+ int (*e4m3_order)(nk_e4m3_t, nk_e4m3_t);
455
+ int (*e3m2_order)(nk_e3m2_t, nk_e3m2_t);
456
+ int (*e2m3_order)(nk_e2m3_t, nk_e2m3_t);
457
+ } nk_implementations_t;
458
+
459
+ // Global dispatch table - defined in numkong.c
460
+ extern nk_implementations_t nk_dispatch_table;
461
+
462
+ // Error handlers - defined in numkong.c
463
+ extern void nk_error_dense_(void const *, void const *, nk_size_t, void *);
464
+ extern void nk_error_sparse_intersect_(void const *, void const *, nk_size_t, nk_size_t, void *, nk_size_t *);
465
+ extern void nk_error_sparse_dot_(void const *, void const *, void const *, void const *, nk_size_t, nk_size_t, void *);
466
+ extern void nk_error_curved_(void const *, void const *, void const *, nk_size_t, void *);
467
+ extern void nk_error_geospatial_(void const *, void const *, void const *, void const *, nk_size_t, void *);
468
+ extern void nk_error_each_fma_(void const *, void const *, void const *, nk_size_t, void const *, void const *, void *);
469
+ extern void nk_error_each_blend_(void const *, void const *, nk_size_t, void const *, void const *, void *);
470
+ extern void nk_error_each_scale_(void const *, nk_size_t, void const *, void const *, void *);
471
+ extern void nk_error_each_sum_(void const *, void const *, nk_size_t, void *);
472
+ extern void nk_error_trigonometry_(void const *, nk_size_t, void *);
473
+ extern void nk_error_mesh_(void const *, void const *, nk_size_t, void *, void *, void *, void *, void *);
474
+ extern void nk_error_reduce_moments_(void const *, nk_size_t, nk_size_t, void *, void *);
475
+ extern void nk_error_reduce_minmax_(void const *, nk_size_t, nk_size_t, void *, nk_size_t *, void *, nk_size_t *);
476
+ extern nk_size_t nk_error_packed_size_(nk_size_t, nk_size_t);
477
+ extern void nk_error_pack_(void const *, nk_size_t, nk_size_t, nk_size_t, void *);
478
+ extern void nk_error_dots_(void const *, void const *, void *, nk_size_t, nk_size_t, nk_size_t, nk_size_t, nk_size_t);
479
+ extern void nk_error_dots_symmetric_(void const *, nk_size_t, nk_size_t, nk_size_t, void *, nk_size_t, nk_size_t,
480
+ nk_size_t);
481
+
482
+ // Dtype-specific kernel lookup functions
483
+ extern void nk_dispatch_f64c_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
484
+ extern void nk_dispatch_f32c_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
485
+ extern void nk_dispatch_bf16c_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
486
+ extern void nk_dispatch_f16c_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
487
+ extern void nk_dispatch_f64_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
488
+ extern void nk_dispatch_f32_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
489
+ extern void nk_dispatch_bf16_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
490
+ extern void nk_dispatch_f16_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
491
+ extern void nk_dispatch_e5m2_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
492
+ extern void nk_dispatch_e4m3_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
493
+ extern void nk_dispatch_e3m2_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
494
+ extern void nk_dispatch_e2m3_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
495
+ extern void nk_dispatch_i64_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
496
+ extern void nk_dispatch_i32_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
497
+ extern void nk_dispatch_i16_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
498
+ extern void nk_dispatch_i8_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
499
+ extern void nk_dispatch_i4_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
500
+ extern void nk_dispatch_u64_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
501
+ extern void nk_dispatch_u32_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
502
+ extern void nk_dispatch_u16_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
503
+ extern void nk_dispatch_u8_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
504
+ extern void nk_dispatch_u4_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
505
+ extern void nk_dispatch_u1_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
506
+ extern void nk_dispatch_cast_find_(nk_capability_t, nk_kernel_kind_t, nk_kernel_punned_t *, nk_capability_t *);
507
+
508
+ #ifdef __cplusplus
509
+ }
510
+ #endif
511
+
512
+ #endif // NK_DISPATCH_H