numkong 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (294) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +495 -0
  3. package/binding.gyp +540 -0
  4. package/c/dispatch.h +512 -0
  5. package/c/dispatch_bf16.c +389 -0
  6. package/c/dispatch_bf16c.c +52 -0
  7. package/c/dispatch_e2m3.c +263 -0
  8. package/c/dispatch_e3m2.c +243 -0
  9. package/c/dispatch_e4m3.c +276 -0
  10. package/c/dispatch_e5m2.c +272 -0
  11. package/c/dispatch_f16.c +376 -0
  12. package/c/dispatch_f16c.c +58 -0
  13. package/c/dispatch_f32.c +378 -0
  14. package/c/dispatch_f32c.c +99 -0
  15. package/c/dispatch_f64.c +296 -0
  16. package/c/dispatch_f64c.c +98 -0
  17. package/c/dispatch_i16.c +96 -0
  18. package/c/dispatch_i32.c +89 -0
  19. package/c/dispatch_i4.c +150 -0
  20. package/c/dispatch_i64.c +86 -0
  21. package/c/dispatch_i8.c +289 -0
  22. package/c/dispatch_other.c +330 -0
  23. package/c/dispatch_u1.c +148 -0
  24. package/c/dispatch_u16.c +124 -0
  25. package/c/dispatch_u32.c +118 -0
  26. package/c/dispatch_u4.c +150 -0
  27. package/c/dispatch_u64.c +102 -0
  28. package/c/dispatch_u8.c +303 -0
  29. package/c/numkong.c +950 -0
  30. package/include/README.md +573 -0
  31. package/include/module.modulemap +129 -0
  32. package/include/numkong/attention/sapphireamx.h +1361 -0
  33. package/include/numkong/attention/sme.h +2066 -0
  34. package/include/numkong/attention.h +49 -0
  35. package/include/numkong/capabilities.h +748 -0
  36. package/include/numkong/cast/README.md +262 -0
  37. package/include/numkong/cast/haswell.h +975 -0
  38. package/include/numkong/cast/icelake.h +470 -0
  39. package/include/numkong/cast/neon.h +1192 -0
  40. package/include/numkong/cast/rvv.h +1021 -0
  41. package/include/numkong/cast/sapphire.h +262 -0
  42. package/include/numkong/cast/serial.h +2262 -0
  43. package/include/numkong/cast/skylake.h +856 -0
  44. package/include/numkong/cast/v128relaxed.h +180 -0
  45. package/include/numkong/cast.h +230 -0
  46. package/include/numkong/curved/README.md +223 -0
  47. package/include/numkong/curved/genoa.h +182 -0
  48. package/include/numkong/curved/haswell.h +276 -0
  49. package/include/numkong/curved/neon.h +205 -0
  50. package/include/numkong/curved/neonbfdot.h +212 -0
  51. package/include/numkong/curved/neonhalf.h +212 -0
  52. package/include/numkong/curved/rvv.h +305 -0
  53. package/include/numkong/curved/serial.h +207 -0
  54. package/include/numkong/curved/skylake.h +457 -0
  55. package/include/numkong/curved/smef64.h +506 -0
  56. package/include/numkong/curved.h +517 -0
  57. package/include/numkong/curved.hpp +144 -0
  58. package/include/numkong/dot/README.md +425 -0
  59. package/include/numkong/dot/alder.h +563 -0
  60. package/include/numkong/dot/genoa.h +315 -0
  61. package/include/numkong/dot/haswell.h +1688 -0
  62. package/include/numkong/dot/icelake.h +883 -0
  63. package/include/numkong/dot/neon.h +818 -0
  64. package/include/numkong/dot/neonbfdot.h +244 -0
  65. package/include/numkong/dot/neonfhm.h +360 -0
  66. package/include/numkong/dot/neonhalf.h +198 -0
  67. package/include/numkong/dot/neonsdot.h +508 -0
  68. package/include/numkong/dot/rvv.h +714 -0
  69. package/include/numkong/dot/rvvbb.h +72 -0
  70. package/include/numkong/dot/rvvbf16.h +123 -0
  71. package/include/numkong/dot/rvvhalf.h +129 -0
  72. package/include/numkong/dot/sapphire.h +141 -0
  73. package/include/numkong/dot/serial.h +838 -0
  74. package/include/numkong/dot/sierra.h +405 -0
  75. package/include/numkong/dot/skylake.h +1084 -0
  76. package/include/numkong/dot/sve.h +379 -0
  77. package/include/numkong/dot/svebfdot.h +74 -0
  78. package/include/numkong/dot/svehalf.h +123 -0
  79. package/include/numkong/dot/v128relaxed.h +1258 -0
  80. package/include/numkong/dot.h +1070 -0
  81. package/include/numkong/dot.hpp +94 -0
  82. package/include/numkong/dots/README.md +496 -0
  83. package/include/numkong/dots/alder.h +114 -0
  84. package/include/numkong/dots/genoa.h +94 -0
  85. package/include/numkong/dots/haswell.h +295 -0
  86. package/include/numkong/dots/icelake.h +171 -0
  87. package/include/numkong/dots/neon.h +120 -0
  88. package/include/numkong/dots/neonbfdot.h +58 -0
  89. package/include/numkong/dots/neonfhm.h +94 -0
  90. package/include/numkong/dots/neonhalf.h +57 -0
  91. package/include/numkong/dots/neonsdot.h +108 -0
  92. package/include/numkong/dots/rvv.h +2486 -0
  93. package/include/numkong/dots/sapphireamx.h +3973 -0
  94. package/include/numkong/dots/serial.h +2844 -0
  95. package/include/numkong/dots/sierra.h +97 -0
  96. package/include/numkong/dots/skylake.h +196 -0
  97. package/include/numkong/dots/sme.h +5372 -0
  98. package/include/numkong/dots/smebi32.h +461 -0
  99. package/include/numkong/dots/smef64.h +1318 -0
  100. package/include/numkong/dots/smehalf.h +47 -0
  101. package/include/numkong/dots/v128relaxed.h +294 -0
  102. package/include/numkong/dots.h +2804 -0
  103. package/include/numkong/dots.hpp +639 -0
  104. package/include/numkong/each/README.md +469 -0
  105. package/include/numkong/each/haswell.h +1658 -0
  106. package/include/numkong/each/icelake.h +272 -0
  107. package/include/numkong/each/neon.h +1104 -0
  108. package/include/numkong/each/neonbfdot.h +212 -0
  109. package/include/numkong/each/neonhalf.h +410 -0
  110. package/include/numkong/each/rvv.h +1121 -0
  111. package/include/numkong/each/sapphire.h +477 -0
  112. package/include/numkong/each/serial.h +260 -0
  113. package/include/numkong/each/skylake.h +1562 -0
  114. package/include/numkong/each.h +2146 -0
  115. package/include/numkong/each.hpp +434 -0
  116. package/include/numkong/geospatial/README.md +147 -0
  117. package/include/numkong/geospatial/haswell.h +593 -0
  118. package/include/numkong/geospatial/neon.h +571 -0
  119. package/include/numkong/geospatial/rvv.h +701 -0
  120. package/include/numkong/geospatial/serial.h +309 -0
  121. package/include/numkong/geospatial/skylake.h +577 -0
  122. package/include/numkong/geospatial/v128relaxed.h +613 -0
  123. package/include/numkong/geospatial.h +453 -0
  124. package/include/numkong/geospatial.hpp +235 -0
  125. package/include/numkong/matrix.hpp +336 -0
  126. package/include/numkong/maxsim/README.md +187 -0
  127. package/include/numkong/maxsim/alder.h +511 -0
  128. package/include/numkong/maxsim/genoa.h +115 -0
  129. package/include/numkong/maxsim/haswell.h +553 -0
  130. package/include/numkong/maxsim/icelake.h +480 -0
  131. package/include/numkong/maxsim/neonsdot.h +394 -0
  132. package/include/numkong/maxsim/sapphireamx.h +877 -0
  133. package/include/numkong/maxsim/serial.h +490 -0
  134. package/include/numkong/maxsim/sme.h +929 -0
  135. package/include/numkong/maxsim/v128relaxed.h +280 -0
  136. package/include/numkong/maxsim.h +571 -0
  137. package/include/numkong/maxsim.hpp +133 -0
  138. package/include/numkong/mesh/README.md +227 -0
  139. package/include/numkong/mesh/haswell.h +2235 -0
  140. package/include/numkong/mesh/neon.h +1329 -0
  141. package/include/numkong/mesh/neonbfdot.h +842 -0
  142. package/include/numkong/mesh/neonhalf.h +616 -0
  143. package/include/numkong/mesh/rvv.h +916 -0
  144. package/include/numkong/mesh/serial.h +742 -0
  145. package/include/numkong/mesh/skylake.h +1135 -0
  146. package/include/numkong/mesh/v128relaxed.h +1052 -0
  147. package/include/numkong/mesh.h +652 -0
  148. package/include/numkong/mesh.hpp +762 -0
  149. package/include/numkong/numkong.h +78 -0
  150. package/include/numkong/numkong.hpp +57 -0
  151. package/include/numkong/probability/README.md +173 -0
  152. package/include/numkong/probability/haswell.h +267 -0
  153. package/include/numkong/probability/neon.h +225 -0
  154. package/include/numkong/probability/rvv.h +409 -0
  155. package/include/numkong/probability/serial.h +169 -0
  156. package/include/numkong/probability/skylake.h +324 -0
  157. package/include/numkong/probability.h +383 -0
  158. package/include/numkong/probability.hpp +120 -0
  159. package/include/numkong/random.h +50 -0
  160. package/include/numkong/random.hpp +285 -0
  161. package/include/numkong/reduce/README.md +547 -0
  162. package/include/numkong/reduce/alder.h +632 -0
  163. package/include/numkong/reduce/genoa.h +201 -0
  164. package/include/numkong/reduce/haswell.h +3783 -0
  165. package/include/numkong/reduce/icelake.h +549 -0
  166. package/include/numkong/reduce/neon.h +3841 -0
  167. package/include/numkong/reduce/neonbfdot.h +353 -0
  168. package/include/numkong/reduce/neonfhm.h +665 -0
  169. package/include/numkong/reduce/neonhalf.h +157 -0
  170. package/include/numkong/reduce/neonsdot.h +357 -0
  171. package/include/numkong/reduce/rvv.h +3407 -0
  172. package/include/numkong/reduce/serial.h +757 -0
  173. package/include/numkong/reduce/sierra.h +338 -0
  174. package/include/numkong/reduce/skylake.h +3792 -0
  175. package/include/numkong/reduce/v128relaxed.h +2302 -0
  176. package/include/numkong/reduce.h +1597 -0
  177. package/include/numkong/reduce.hpp +633 -0
  178. package/include/numkong/scalar/README.md +89 -0
  179. package/include/numkong/scalar/haswell.h +113 -0
  180. package/include/numkong/scalar/neon.h +122 -0
  181. package/include/numkong/scalar/neonhalf.h +70 -0
  182. package/include/numkong/scalar/rvv.h +211 -0
  183. package/include/numkong/scalar/sapphire.h +63 -0
  184. package/include/numkong/scalar/serial.h +332 -0
  185. package/include/numkong/scalar/v128relaxed.h +56 -0
  186. package/include/numkong/scalar.h +683 -0
  187. package/include/numkong/set/README.md +179 -0
  188. package/include/numkong/set/haswell.h +334 -0
  189. package/include/numkong/set/icelake.h +485 -0
  190. package/include/numkong/set/neon.h +364 -0
  191. package/include/numkong/set/rvv.h +226 -0
  192. package/include/numkong/set/rvvbb.h +117 -0
  193. package/include/numkong/set/serial.h +174 -0
  194. package/include/numkong/set/sve.h +185 -0
  195. package/include/numkong/set/v128relaxed.h +240 -0
  196. package/include/numkong/set.h +457 -0
  197. package/include/numkong/set.hpp +114 -0
  198. package/include/numkong/sets/README.md +149 -0
  199. package/include/numkong/sets/haswell.h +63 -0
  200. package/include/numkong/sets/icelake.h +66 -0
  201. package/include/numkong/sets/neon.h +61 -0
  202. package/include/numkong/sets/serial.h +43 -0
  203. package/include/numkong/sets/smebi32.h +1099 -0
  204. package/include/numkong/sets/v128relaxed.h +58 -0
  205. package/include/numkong/sets.h +339 -0
  206. package/include/numkong/sparse/README.md +156 -0
  207. package/include/numkong/sparse/icelake.h +463 -0
  208. package/include/numkong/sparse/neon.h +288 -0
  209. package/include/numkong/sparse/serial.h +117 -0
  210. package/include/numkong/sparse/sve2.h +507 -0
  211. package/include/numkong/sparse/turin.h +322 -0
  212. package/include/numkong/sparse.h +363 -0
  213. package/include/numkong/sparse.hpp +113 -0
  214. package/include/numkong/spatial/README.md +435 -0
  215. package/include/numkong/spatial/alder.h +607 -0
  216. package/include/numkong/spatial/genoa.h +290 -0
  217. package/include/numkong/spatial/haswell.h +960 -0
  218. package/include/numkong/spatial/icelake.h +586 -0
  219. package/include/numkong/spatial/neon.h +773 -0
  220. package/include/numkong/spatial/neonbfdot.h +165 -0
  221. package/include/numkong/spatial/neonhalf.h +118 -0
  222. package/include/numkong/spatial/neonsdot.h +261 -0
  223. package/include/numkong/spatial/rvv.h +984 -0
  224. package/include/numkong/spatial/rvvbf16.h +123 -0
  225. package/include/numkong/spatial/rvvhalf.h +117 -0
  226. package/include/numkong/spatial/sapphire.h +343 -0
  227. package/include/numkong/spatial/serial.h +346 -0
  228. package/include/numkong/spatial/sierra.h +323 -0
  229. package/include/numkong/spatial/skylake.h +606 -0
  230. package/include/numkong/spatial/sve.h +224 -0
  231. package/include/numkong/spatial/svebfdot.h +122 -0
  232. package/include/numkong/spatial/svehalf.h +109 -0
  233. package/include/numkong/spatial/v128relaxed.h +717 -0
  234. package/include/numkong/spatial.h +1425 -0
  235. package/include/numkong/spatial.hpp +183 -0
  236. package/include/numkong/spatials/README.md +580 -0
  237. package/include/numkong/spatials/alder.h +94 -0
  238. package/include/numkong/spatials/genoa.h +94 -0
  239. package/include/numkong/spatials/haswell.h +219 -0
  240. package/include/numkong/spatials/icelake.h +113 -0
  241. package/include/numkong/spatials/neon.h +109 -0
  242. package/include/numkong/spatials/neonbfdot.h +60 -0
  243. package/include/numkong/spatials/neonfhm.h +92 -0
  244. package/include/numkong/spatials/neonhalf.h +58 -0
  245. package/include/numkong/spatials/neonsdot.h +109 -0
  246. package/include/numkong/spatials/rvv.h +1960 -0
  247. package/include/numkong/spatials/sapphireamx.h +1149 -0
  248. package/include/numkong/spatials/serial.h +226 -0
  249. package/include/numkong/spatials/sierra.h +96 -0
  250. package/include/numkong/spatials/skylake.h +184 -0
  251. package/include/numkong/spatials/sme.h +1901 -0
  252. package/include/numkong/spatials/smef64.h +465 -0
  253. package/include/numkong/spatials/v128relaxed.h +240 -0
  254. package/include/numkong/spatials.h +3021 -0
  255. package/include/numkong/spatials.hpp +508 -0
  256. package/include/numkong/tensor.hpp +1592 -0
  257. package/include/numkong/trigonometry/README.md +184 -0
  258. package/include/numkong/trigonometry/haswell.h +652 -0
  259. package/include/numkong/trigonometry/neon.h +639 -0
  260. package/include/numkong/trigonometry/rvv.h +699 -0
  261. package/include/numkong/trigonometry/serial.h +703 -0
  262. package/include/numkong/trigonometry/skylake.h +721 -0
  263. package/include/numkong/trigonometry/v128relaxed.h +666 -0
  264. package/include/numkong/trigonometry.h +467 -0
  265. package/include/numkong/trigonometry.hpp +166 -0
  266. package/include/numkong/types.h +1384 -0
  267. package/include/numkong/types.hpp +5603 -0
  268. package/include/numkong/vector.hpp +698 -0
  269. package/javascript/README.md +246 -0
  270. package/javascript/dist/cjs/numkong-wasm.d.ts +166 -0
  271. package/javascript/dist/cjs/numkong-wasm.js +617 -0
  272. package/javascript/dist/cjs/numkong.d.ts +343 -0
  273. package/javascript/dist/cjs/numkong.js +523 -0
  274. package/javascript/dist/cjs/package.json +3 -0
  275. package/javascript/dist/cjs/types.d.ts +284 -0
  276. package/javascript/dist/cjs/types.js +653 -0
  277. package/javascript/dist/esm/numkong-wasm.d.ts +166 -0
  278. package/javascript/dist/esm/numkong-wasm.js +595 -0
  279. package/javascript/dist/esm/numkong.d.ts +343 -0
  280. package/javascript/dist/esm/numkong.js +452 -0
  281. package/javascript/dist/esm/package.json +3 -0
  282. package/javascript/dist/esm/types.d.ts +284 -0
  283. package/javascript/dist/esm/types.js +630 -0
  284. package/javascript/dist-package-cjs.json +3 -0
  285. package/javascript/dist-package-esm.json +3 -0
  286. package/javascript/node-gyp-build.d.ts +1 -0
  287. package/javascript/numkong-wasm.ts +756 -0
  288. package/javascript/numkong.c +689 -0
  289. package/javascript/numkong.ts +575 -0
  290. package/javascript/tsconfig-base.json +39 -0
  291. package/javascript/tsconfig-cjs.json +8 -0
  292. package/javascript/tsconfig-esm.json +8 -0
  293. package/javascript/types.ts +674 -0
  294. package/package.json +87 -0
@@ -0,0 +1,376 @@
1
+ /**
2
+ * @brief Dispatch Initialization for F16 Data Types.
3
+ * @file c/dispatch_f16.c
4
+ * @author Ash Vardanian
5
+ * @date February 3, 2026
6
+ */
7
+ #include "dispatch.h"
8
+
9
+ void nk_dispatch_f16_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_punned_t *m, nk_capability_t *c) {
10
+ typedef nk_kernel_punned_t m_t;
11
+ #if NK_TARGET_V128RELAXED
12
+ if (v & nk_cap_v128relaxed_k) switch (k) {
13
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_f16_v128relaxed, *c = nk_cap_v128relaxed_k; return;
14
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_f16_v128relaxed, *c = nk_cap_v128relaxed_k; return;
15
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_f16_v128relaxed, *c = nk_cap_v128relaxed_k; return;
16
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_f16_v128relaxed, *c = nk_cap_v128relaxed_k; return;
17
+ case nk_kernel_reduce_moments_k:
18
+ *m = (m_t)&nk_reduce_moments_f16_v128relaxed, *c = nk_cap_v128relaxed_k;
19
+ return;
20
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_f16_v128relaxed, *c = nk_cap_v128relaxed_k; return;
21
+ case nk_kernel_maxsim_packed_size_k:
22
+ *m = (m_t)&nk_maxsim_packed_size_f16_v128relaxed, *c = nk_cap_v128relaxed_k;
23
+ return;
24
+ case nk_kernel_maxsim_pack_k: *m = (m_t)&nk_maxsim_pack_f16_v128relaxed, *c = nk_cap_v128relaxed_k; return;
25
+ case nk_kernel_maxsim_packed_k: *m = (m_t)&nk_maxsim_packed_f16_v128relaxed, *c = nk_cap_v128relaxed_k; return;
26
+ case nk_kernel_dots_packed_size_k:
27
+ *m = (m_t)&nk_dots_packed_size_f16_v128relaxed, *c = nk_cap_v128relaxed_k;
28
+ return;
29
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_f16_v128relaxed, *c = nk_cap_v128relaxed_k; return;
30
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_f16_v128relaxed, *c = nk_cap_v128relaxed_k; return;
31
+ case nk_kernel_dots_symmetric_k:
32
+ *m = (m_t)&nk_dots_symmetric_f16_v128relaxed, *c = nk_cap_v128relaxed_k;
33
+ return;
34
+ case nk_kernel_angulars_packed_k:
35
+ *m = (m_t)&nk_angulars_packed_f16_v128relaxed, *c = nk_cap_v128relaxed_k;
36
+ return;
37
+ case nk_kernel_angulars_symmetric_k:
38
+ *m = (m_t)&nk_angulars_symmetric_f16_v128relaxed, *c = nk_cap_v128relaxed_k;
39
+ return;
40
+ case nk_kernel_euclideans_packed_k:
41
+ *m = (m_t)&nk_euclideans_packed_f16_v128relaxed, *c = nk_cap_v128relaxed_k;
42
+ return;
43
+ case nk_kernel_euclideans_symmetric_k:
44
+ *m = (m_t)&nk_euclideans_symmetric_f16_v128relaxed, *c = nk_cap_v128relaxed_k;
45
+ return;
46
+ default: break;
47
+ }
48
+ #endif
49
+ #if NK_TARGET_SME
50
+ if (v & nk_cap_sme_k) switch (k) {
51
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_f16_sme, *c = nk_cap_sme_k; return;
52
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_f16_sme, *c = nk_cap_sme_k; return;
53
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_f16_sme, *c = nk_cap_sme_k; return;
54
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_f16_sme, *c = nk_cap_sme_k; return;
55
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_f16_sme, *c = nk_cap_sme_k; return;
56
+ case nk_kernel_angulars_symmetric_k: *m = (m_t)&nk_angulars_symmetric_f16_sme, *c = nk_cap_sme_k; return;
57
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_f16_sme, *c = nk_cap_sme_k; return;
58
+ case nk_kernel_euclideans_symmetric_k: *m = (m_t)&nk_euclideans_symmetric_f16_sme, *c = nk_cap_sme_k; return;
59
+ case nk_kernel_maxsim_packed_size_k: *m = (m_t)&nk_maxsim_packed_size_f16_sme, *c = nk_cap_sme_k; return;
60
+ case nk_kernel_maxsim_pack_k: *m = (m_t)&nk_maxsim_pack_f16_sme, *c = nk_cap_sme_k; return;
61
+ case nk_kernel_maxsim_packed_k: *m = (m_t)&nk_maxsim_packed_f16_sme, *c = nk_cap_sme_k; return;
62
+ default: break;
63
+ }
64
+ #endif
65
+ #if NK_TARGET_SVEHALF
66
+ if (v & nk_cap_svehalf_k) switch (k) {
67
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_f16_svehalf, *c = nk_cap_svehalf_k; return;
68
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_f16_svehalf, *c = nk_cap_svehalf_k; return;
69
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_f16_svehalf, *c = nk_cap_svehalf_k; return;
70
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_f16_svehalf, *c = nk_cap_svehalf_k; return;
71
+ default: break;
72
+ }
73
+ #endif
74
+ #if NK_TARGET_NEONFHM
75
+ if (v & nk_cap_neonfhm_k) switch (k) {
76
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_f16_neonfhm, *c = nk_cap_neonfhm_k; return;
77
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_f16_neonfhm, *c = nk_cap_neonfhm_k; return;
78
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_f16_neonfhm, *c = nk_cap_neonfhm_k; return;
79
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_f16_neonfhm, *c = nk_cap_neonfhm_k; return;
80
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_f16_neonfhm, *c = nk_cap_neonfhm_k; return;
81
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_f16_neonfhm, *c = nk_cap_neonfhm_k; return;
82
+ case nk_kernel_angulars_symmetric_k:
83
+ *m = (m_t)&nk_angulars_symmetric_f16_neonfhm, *c = nk_cap_neonfhm_k;
84
+ return;
85
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_f16_neonfhm, *c = nk_cap_neonfhm_k; return;
86
+ case nk_kernel_euclideans_symmetric_k:
87
+ *m = (m_t)&nk_euclideans_symmetric_f16_neonfhm, *c = nk_cap_neonfhm_k;
88
+ return;
89
+ default: break;
90
+ }
91
+ #endif
92
+ #if NK_TARGET_NEONHALF
93
+ if (v & nk_cap_neonhalf_k) switch (k) {
94
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
95
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
96
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
97
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
98
+ case nk_kernel_jsd_k: *m = (m_t)&nk_jsd_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
99
+ case nk_kernel_kld_k: *m = (m_t)&nk_kld_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
100
+ case nk_kernel_bilinear_k: *m = (m_t)&nk_bilinear_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
101
+ case nk_kernel_mahalanobis_k: *m = (m_t)&nk_mahalanobis_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
102
+ case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
103
+ case nk_kernel_each_blend_k: *m = (m_t)&nk_each_blend_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
104
+ case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
105
+ case nk_kernel_each_sum_k: *m = (m_t)&nk_each_sum_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
106
+ case nk_kernel_kabsch_k: *m = (m_t)&nk_kabsch_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
107
+ case nk_kernel_rmsd_k: *m = (m_t)&nk_rmsd_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
108
+ case nk_kernel_umeyama_k: *m = (m_t)&nk_umeyama_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
109
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
110
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
111
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
112
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
113
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
114
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
115
+ case nk_kernel_angulars_symmetric_k:
116
+ *m = (m_t)&nk_angulars_symmetric_f16_neonhalf, *c = nk_cap_neonhalf_k;
117
+ return;
118
+ case nk_kernel_euclideans_packed_k:
119
+ *m = (m_t)&nk_euclideans_packed_f16_neonhalf, *c = nk_cap_neonhalf_k;
120
+ return;
121
+ case nk_kernel_euclideans_symmetric_k:
122
+ *m = (m_t)&nk_euclideans_symmetric_f16_neonhalf, *c = nk_cap_neonhalf_k;
123
+ return;
124
+ default: break;
125
+ }
126
+ #endif
127
+ #if NK_TARGET_NEON
128
+ if (v & nk_cap_neon_k) switch (k) {
129
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_f16_neon, *c = nk_cap_neon_k; return;
130
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_f16_neon, *c = nk_cap_neon_k; return;
131
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_f16_neon, *c = nk_cap_neon_k; return;
132
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_f16_neon, *c = nk_cap_neon_k; return;
133
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_f16_neon, *c = nk_cap_neon_k; return;
134
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_f16_neon, *c = nk_cap_neon_k; return;
135
+ case nk_kernel_angulars_symmetric_k: *m = (m_t)&nk_angulars_symmetric_f16_neon, *c = nk_cap_neon_k; return;
136
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_f16_neon, *c = nk_cap_neon_k; return;
137
+ case nk_kernel_euclideans_symmetric_k: *m = (m_t)&nk_euclideans_symmetric_f16_neon, *c = nk_cap_neon_k; return;
138
+ default: break;
139
+ }
140
+ #endif
141
+ #if NK_TARGET_NEONSDOT
142
+ if (v & nk_cap_neonsdot_k) switch (k) {
143
+ case nk_kernel_maxsim_packed_size_k:
144
+ *m = (m_t)&nk_maxsim_packed_size_f16_neonsdot, *c = nk_cap_neonsdot_k;
145
+ return;
146
+ case nk_kernel_maxsim_pack_k: *m = (m_t)&nk_maxsim_pack_f16_neonsdot, *c = nk_cap_neonsdot_k; return;
147
+ case nk_kernel_maxsim_packed_k: *m = (m_t)&nk_maxsim_packed_f16_neonsdot, *c = nk_cap_neonsdot_k; return;
148
+ default: break;
149
+ }
150
+ #endif
151
+ #if NK_TARGET_SAPPHIREAMX
152
+ if (v & nk_cap_sapphireamx_k) switch (k) {
153
+ case nk_kernel_maxsim_packed_size_k:
154
+ *m = (m_t)&nk_maxsim_packed_size_f16_sapphireamx, *c = nk_cap_sapphireamx_k;
155
+ return;
156
+ case nk_kernel_maxsim_pack_k: *m = (m_t)&nk_maxsim_pack_f16_sapphireamx, *c = nk_cap_sapphireamx_k; return;
157
+ case nk_kernel_maxsim_packed_k: *m = (m_t)&nk_maxsim_packed_f16_sapphireamx, *c = nk_cap_sapphireamx_k; return;
158
+ default: break;
159
+ }
160
+ #endif
161
+ #if NK_TARGET_SAPPHIRE
162
+ if (v & nk_cap_sapphire_k) switch (k) {
163
+ case nk_kernel_each_sum_k: *m = (m_t)&nk_each_sum_f16_sapphire, *c = nk_cap_sapphire_k; return;
164
+ default: break;
165
+ }
166
+ #endif
167
+ #if NK_TARGET_ICELAKE
168
+ if (v & nk_cap_icelake_k) switch (k) {
169
+ case nk_kernel_maxsim_packed_size_k:
170
+ *m = (m_t)&nk_maxsim_packed_size_f16_icelake, *c = nk_cap_icelake_k;
171
+ return;
172
+ case nk_kernel_maxsim_pack_k: *m = (m_t)&nk_maxsim_pack_f16_icelake, *c = nk_cap_icelake_k; return;
173
+ case nk_kernel_maxsim_packed_k: *m = (m_t)&nk_maxsim_packed_f16_icelake, *c = nk_cap_icelake_k; return;
174
+ default: break;
175
+ }
176
+ #endif
177
+ #if NK_TARGET_SKYLAKE
178
+ if (v & nk_cap_skylake_k) switch (k) {
179
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_f16_skylake, *c = nk_cap_skylake_k; return;
180
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_f16_skylake, *c = nk_cap_skylake_k; return;
181
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_f16_skylake, *c = nk_cap_skylake_k; return;
182
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_f16_skylake, *c = nk_cap_skylake_k; return;
183
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_f16_skylake, *c = nk_cap_skylake_k; return;
184
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_f16_skylake, *c = nk_cap_skylake_k; return;
185
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_f16_skylake, *c = nk_cap_skylake_k; return;
186
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_f16_skylake, *c = nk_cap_skylake_k; return;
187
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_f16_skylake, *c = nk_cap_skylake_k; return;
188
+ case nk_kernel_angulars_symmetric_k:
189
+ *m = (m_t)&nk_angulars_symmetric_f16_skylake, *c = nk_cap_skylake_k;
190
+ return;
191
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_f16_skylake, *c = nk_cap_skylake_k; return;
192
+ case nk_kernel_euclideans_symmetric_k:
193
+ *m = (m_t)&nk_euclideans_symmetric_f16_skylake, *c = nk_cap_skylake_k;
194
+ return;
195
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_f16_skylake, *c = nk_cap_skylake_k; return;
196
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_f16_skylake, *c = nk_cap_skylake_k; return;
197
+ case nk_kernel_kld_k: *m = (m_t)&nk_kld_f16_skylake, *c = nk_cap_skylake_k; return;
198
+ case nk_kernel_jsd_k: *m = (m_t)&nk_jsd_f16_skylake, *c = nk_cap_skylake_k; return;
199
+ case nk_kernel_each_sin_k: *m = (m_t)&nk_each_sin_f16_skylake, *c = nk_cap_skylake_k; return;
200
+ case nk_kernel_each_cos_k: *m = (m_t)&nk_each_cos_f16_skylake, *c = nk_cap_skylake_k; return;
201
+ case nk_kernel_each_atan_k: *m = (m_t)&nk_each_atan_f16_skylake, *c = nk_cap_skylake_k; return;
202
+ case nk_kernel_each_blend_k: *m = (m_t)&nk_each_blend_f16_skylake, *c = nk_cap_skylake_k; return;
203
+ case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_f16_skylake, *c = nk_cap_skylake_k; return;
204
+ case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_f16_skylake, *c = nk_cap_skylake_k; return;
205
+ default: break;
206
+ }
207
+ #endif
208
+ #if NK_TARGET_ALDER
209
+ if (v & nk_cap_alder_k) switch (k) {
210
+ case nk_kernel_maxsim_packed_size_k: *m = (m_t)&nk_maxsim_packed_size_f16_alder, *c = nk_cap_alder_k; return;
211
+ case nk_kernel_maxsim_pack_k: *m = (m_t)&nk_maxsim_pack_f16_alder, *c = nk_cap_alder_k; return;
212
+ case nk_kernel_maxsim_packed_k: *m = (m_t)&nk_maxsim_packed_f16_alder, *c = nk_cap_alder_k; return;
213
+ default: break;
214
+ }
215
+ #endif
216
+ #if NK_TARGET_HASWELL
217
+ if (v & nk_cap_haswell_k) switch (k) {
218
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_f16_haswell, *c = nk_cap_haswell_k; return;
219
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_f16_haswell, *c = nk_cap_haswell_k; return;
220
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_f16_haswell, *c = nk_cap_haswell_k; return;
221
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_f16_haswell, *c = nk_cap_haswell_k; return;
222
+ case nk_kernel_jsd_k: *m = (m_t)&nk_jsd_f16_haswell, *c = nk_cap_haswell_k; return;
223
+ case nk_kernel_kld_k: *m = (m_t)&nk_kld_f16_haswell, *c = nk_cap_haswell_k; return;
224
+ case nk_kernel_bilinear_k: *m = (m_t)&nk_bilinear_f16_haswell, *c = nk_cap_haswell_k; return;
225
+ case nk_kernel_mahalanobis_k: *m = (m_t)&nk_mahalanobis_f16_haswell, *c = nk_cap_haswell_k; return;
226
+ case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_f16_haswell, *c = nk_cap_haswell_k; return;
227
+ case nk_kernel_each_blend_k: *m = (m_t)&nk_each_blend_f16_haswell, *c = nk_cap_haswell_k; return;
228
+ case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_f16_haswell, *c = nk_cap_haswell_k; return;
229
+ case nk_kernel_each_sum_k: *m = (m_t)&nk_each_sum_f16_haswell, *c = nk_cap_haswell_k; return;
230
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_f16_haswell, *c = nk_cap_haswell_k; return;
231
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_f16_haswell, *c = nk_cap_haswell_k; return;
232
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_f16_haswell, *c = nk_cap_haswell_k; return;
233
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_f16_haswell, *c = nk_cap_haswell_k; return;
234
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_f16_haswell, *c = nk_cap_haswell_k; return;
235
+ case nk_kernel_angulars_symmetric_k:
236
+ *m = (m_t)&nk_angulars_symmetric_f16_haswell, *c = nk_cap_haswell_k;
237
+ return;
238
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_f16_haswell, *c = nk_cap_haswell_k; return;
239
+ case nk_kernel_euclideans_symmetric_k:
240
+ *m = (m_t)&nk_euclideans_symmetric_f16_haswell, *c = nk_cap_haswell_k;
241
+ return;
242
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_f16_haswell, *c = nk_cap_haswell_k; return;
243
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_f16_haswell, *c = nk_cap_haswell_k; return;
244
+ case nk_kernel_maxsim_packed_size_k:
245
+ *m = (m_t)&nk_maxsim_packed_size_f16_haswell, *c = nk_cap_haswell_k;
246
+ return;
247
+ case nk_kernel_maxsim_pack_k: *m = (m_t)&nk_maxsim_pack_f16_haswell, *c = nk_cap_haswell_k; return;
248
+ case nk_kernel_maxsim_packed_k: *m = (m_t)&nk_maxsim_packed_f16_haswell, *c = nk_cap_haswell_k; return;
249
+ case nk_kernel_kabsch_k: *m = (m_t)&nk_kabsch_f16_haswell, *c = nk_cap_haswell_k; return;
250
+ case nk_kernel_rmsd_k: *m = (m_t)&nk_rmsd_f16_haswell, *c = nk_cap_haswell_k; return;
251
+ case nk_kernel_umeyama_k: *m = (m_t)&nk_umeyama_f16_haswell, *c = nk_cap_haswell_k; return;
252
+ default: break;
253
+ }
254
+ #endif
255
+ #if NK_TARGET_RVVHALF
256
+ if (v & nk_cap_rvvhalf_k) switch (k) {
257
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_f16_rvvhalf, *c = nk_cap_rvvhalf_k; return;
258
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_f16_rvvhalf, *c = nk_cap_rvvhalf_k; return;
259
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_f16_rvvhalf, *c = nk_cap_rvvhalf_k; return;
260
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_f16_rvvhalf, *c = nk_cap_rvvhalf_k; return;
261
+ default: break;
262
+ }
263
+ #endif
264
+ #if NK_TARGET_RVV
265
+ if (v & nk_cap_rvv_k) switch (k) {
266
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_f16_rvv, *c = nk_cap_rvv_k; return;
267
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_f16_rvv, *c = nk_cap_rvv_k; return;
268
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_f16_rvv, *c = nk_cap_rvv_k; return;
269
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_f16_rvv, *c = nk_cap_rvv_k; return;
270
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_f16_rvv, *c = nk_cap_rvv_k; return;
271
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_f16_rvv, *c = nk_cap_rvv_k; return;
272
+ case nk_kernel_kld_k: *m = (m_t)&nk_kld_f16_rvv, *c = nk_cap_rvv_k; return;
273
+ case nk_kernel_jsd_k: *m = (m_t)&nk_jsd_f16_rvv, *c = nk_cap_rvv_k; return;
274
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_f16_rvv, *c = nk_cap_rvv_k; return;
275
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_f16_rvv, *c = nk_cap_rvv_k; return;
276
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_f16_rvv, *c = nk_cap_rvv_k; return;
277
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_f16_rvv, *c = nk_cap_rvv_k; return;
278
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_f16_rvv, *c = nk_cap_rvv_k; return;
279
+ case nk_kernel_angulars_symmetric_k: *m = (m_t)&nk_angulars_symmetric_f16_rvv, *c = nk_cap_rvv_k; return;
280
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_f16_rvv, *c = nk_cap_rvv_k; return;
281
+ case nk_kernel_euclideans_symmetric_k: *m = (m_t)&nk_euclideans_symmetric_f16_rvv, *c = nk_cap_rvv_k; return;
282
+ case nk_kernel_each_sin_k: *m = (m_t)&nk_each_sin_f16_rvv, *c = nk_cap_rvv_k; return;
283
+ case nk_kernel_each_cos_k: *m = (m_t)&nk_each_cos_f16_rvv, *c = nk_cap_rvv_k; return;
284
+ case nk_kernel_each_atan_k: *m = (m_t)&nk_each_atan_f16_rvv, *c = nk_cap_rvv_k; return;
285
+ case nk_kernel_each_blend_k: *m = (m_t)&nk_each_blend_f16_rvv, *c = nk_cap_rvv_k; return;
286
+ case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_f16_rvv, *c = nk_cap_rvv_k; return;
287
+ case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_f16_rvv, *c = nk_cap_rvv_k; return;
288
+ case nk_kernel_each_sum_k: *m = (m_t)&nk_each_sum_f16_rvv, *c = nk_cap_rvv_k; return;
289
+ case nk_kernel_bilinear_k: *m = (m_t)&nk_bilinear_f16_rvv, *c = nk_cap_rvv_k; return;
290
+ case nk_kernel_mahalanobis_k: *m = (m_t)&nk_mahalanobis_f16_rvv, *c = nk_cap_rvv_k; return;
291
+ case nk_kernel_kabsch_k: *m = (m_t)&nk_kabsch_f16_rvv, *c = nk_cap_rvv_k; return;
292
+ case nk_kernel_rmsd_k: *m = (m_t)&nk_rmsd_f16_rvv, *c = nk_cap_rvv_k; return;
293
+ case nk_kernel_umeyama_k: *m = (m_t)&nk_umeyama_f16_rvv, *c = nk_cap_rvv_k; return;
294
+ default: break;
295
+ }
296
+ #endif
297
+ if (v & nk_cap_serial_k) switch (k) {
298
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_f16_serial, *c = nk_cap_serial_k; return;
299
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_f16_serial, *c = nk_cap_serial_k; return;
300
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_f16_serial, *c = nk_cap_serial_k; return;
301
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_f16_serial, *c = nk_cap_serial_k; return;
302
+ case nk_kernel_jsd_k: *m = (m_t)&nk_jsd_f16_serial, *c = nk_cap_serial_k; return;
303
+ case nk_kernel_kld_k: *m = (m_t)&nk_kld_f16_serial, *c = nk_cap_serial_k; return;
304
+ case nk_kernel_bilinear_k: *m = (m_t)&nk_bilinear_f16_serial, *c = nk_cap_serial_k; return;
305
+ case nk_kernel_mahalanobis_k: *m = (m_t)&nk_mahalanobis_f16_serial, *c = nk_cap_serial_k; return;
306
+ case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_f16_serial, *c = nk_cap_serial_k; return;
307
+ case nk_kernel_each_blend_k: *m = (m_t)&nk_each_blend_f16_serial, *c = nk_cap_serial_k; return;
308
+ case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_f16_serial, *c = nk_cap_serial_k; return;
309
+ case nk_kernel_each_sum_k: *m = (m_t)&nk_each_sum_f16_serial, *c = nk_cap_serial_k; return;
310
+ case nk_kernel_rmsd_k: *m = (m_t)&nk_rmsd_f16_serial, *c = nk_cap_serial_k; return;
311
+ case nk_kernel_kabsch_k: *m = (m_t)&nk_kabsch_f16_serial, *c = nk_cap_serial_k; return;
312
+ case nk_kernel_umeyama_k: *m = (m_t)&nk_umeyama_f16_serial, *c = nk_cap_serial_k; return;
313
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_f16_serial, *c = nk_cap_serial_k; return;
314
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_f16_serial, *c = nk_cap_serial_k; return;
315
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_f16_serial, *c = nk_cap_serial_k; return;
316
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_f16_serial, *c = nk_cap_serial_k; return;
317
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_f16_serial, *c = nk_cap_serial_k; return;
318
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_f16_serial, *c = nk_cap_serial_k; return;
319
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_f16_serial, *c = nk_cap_serial_k; return;
320
+ case nk_kernel_angulars_symmetric_k: *m = (m_t)&nk_angulars_symmetric_f16_serial, *c = nk_cap_serial_k; return;
321
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_f16_serial, *c = nk_cap_serial_k; return;
322
+ case nk_kernel_euclideans_symmetric_k:
323
+ *m = (m_t)&nk_euclideans_symmetric_f16_serial, *c = nk_cap_serial_k;
324
+ return;
325
+ case nk_kernel_each_sin_k: *m = (m_t)&nk_each_sin_f16_serial, *c = nk_cap_serial_k; return;
326
+ case nk_kernel_each_cos_k: *m = (m_t)&nk_each_cos_f16_serial, *c = nk_cap_serial_k; return;
327
+ case nk_kernel_each_atan_k: *m = (m_t)&nk_each_atan_f16_serial, *c = nk_cap_serial_k; return;
328
+ case nk_kernel_maxsim_packed_size_k: *m = (m_t)&nk_maxsim_packed_size_f16_serial, *c = nk_cap_serial_k; return;
329
+ case nk_kernel_maxsim_pack_k: *m = (m_t)&nk_maxsim_pack_f16_serial, *c = nk_cap_serial_k; return;
330
+ case nk_kernel_maxsim_packed_k: *m = (m_t)&nk_maxsim_packed_f16_serial, *c = nk_cap_serial_k; return;
331
+ default: break;
332
+ }
333
+
334
+ // Error fallback - zero capability signals lookup failure
335
+ *m = (m_t)nk_error_dense_, *c = 0;
336
+ }
337
+
338
+ void nk_dispatch_f16_init_(nk_capability_t caps) {
339
+ nk_implementations_t *t = &nk_dispatch_table;
340
+ nk_capability_t used;
341
+
342
+ nk_dispatch_f16_find_(caps, nk_kernel_dot_k, (nk_kernel_punned_t *)&t->dot_f16, &used);
343
+ nk_dispatch_f16_find_(caps, nk_kernel_angular_k, (nk_kernel_punned_t *)&t->angular_f16, &used);
344
+ nk_dispatch_f16_find_(caps, nk_kernel_euclidean_k, (nk_kernel_punned_t *)&t->euclidean_f16, &used);
345
+ nk_dispatch_f16_find_(caps, nk_kernel_sqeuclidean_k, (nk_kernel_punned_t *)&t->sqeuclidean_f16, &used);
346
+ nk_dispatch_f16_find_(caps, nk_kernel_bilinear_k, (nk_kernel_punned_t *)&t->bilinear_f16, &used);
347
+ nk_dispatch_f16_find_(caps, nk_kernel_mahalanobis_k, (nk_kernel_punned_t *)&t->mahalanobis_f16, &used);
348
+ nk_dispatch_f16_find_(caps, nk_kernel_kld_k, (nk_kernel_punned_t *)&t->kld_f16, &used);
349
+ nk_dispatch_f16_find_(caps, nk_kernel_jsd_k, (nk_kernel_punned_t *)&t->jsd_f16, &used);
350
+ nk_dispatch_f16_find_(caps, nk_kernel_rmsd_k, (nk_kernel_punned_t *)&t->rmsd_f16, &used);
351
+ nk_dispatch_f16_find_(caps, nk_kernel_kabsch_k, (nk_kernel_punned_t *)&t->kabsch_f16, &used);
352
+ nk_dispatch_f16_find_(caps, nk_kernel_umeyama_k, (nk_kernel_punned_t *)&t->umeyama_f16, &used);
353
+ nk_dispatch_f16_find_(caps, nk_kernel_each_scale_k, (nk_kernel_punned_t *)&t->each_scale_f16, &used);
354
+ nk_dispatch_f16_find_(caps, nk_kernel_each_sum_k, (nk_kernel_punned_t *)&t->each_sum_f16, &used);
355
+ nk_dispatch_f16_find_(caps, nk_kernel_each_blend_k, (nk_kernel_punned_t *)&t->each_blend_f16, &used);
356
+ nk_dispatch_f16_find_(caps, nk_kernel_each_fma_k, (nk_kernel_punned_t *)&t->each_fma_f16, &used);
357
+ nk_dispatch_f16_find_(caps, nk_kernel_each_sin_k, (nk_kernel_punned_t *)&t->each_sin_f16, &used);
358
+ nk_dispatch_f16_find_(caps, nk_kernel_each_cos_k, (nk_kernel_punned_t *)&t->each_cos_f16, &used);
359
+ nk_dispatch_f16_find_(caps, nk_kernel_each_atan_k, (nk_kernel_punned_t *)&t->each_atan_f16, &used);
360
+ nk_dispatch_f16_find_(caps, nk_kernel_reduce_moments_k, (nk_kernel_punned_t *)&t->reduce_moments_f16, &used);
361
+ nk_dispatch_f16_find_(caps, nk_kernel_reduce_minmax_k, (nk_kernel_punned_t *)&t->reduce_minmax_f16, &used);
362
+ nk_dispatch_f16_find_(caps, nk_kernel_dots_packed_size_k, (nk_kernel_punned_t *)&t->dots_packed_size_f16, &used);
363
+ nk_dispatch_f16_find_(caps, nk_kernel_dots_pack_k, (nk_kernel_punned_t *)&t->dots_pack_f16, &used);
364
+ nk_dispatch_f16_find_(caps, nk_kernel_dots_packed_k, (nk_kernel_punned_t *)&t->dots_packed_f16, &used);
365
+ nk_dispatch_f16_find_(caps, nk_kernel_dots_symmetric_k, (nk_kernel_punned_t *)&t->dots_symmetric_f16, &used);
366
+ nk_dispatch_f16_find_(caps, nk_kernel_angulars_packed_k, (nk_kernel_punned_t *)&t->angulars_packed_f16, &used);
367
+ nk_dispatch_f16_find_(caps, nk_kernel_angulars_symmetric_k, (nk_kernel_punned_t *)&t->angulars_symmetric_f16,
368
+ &used);
369
+ nk_dispatch_f16_find_(caps, nk_kernel_euclideans_packed_k, (nk_kernel_punned_t *)&t->euclideans_packed_f16, &used);
370
+ nk_dispatch_f16_find_(caps, nk_kernel_euclideans_symmetric_k, (nk_kernel_punned_t *)&t->euclideans_symmetric_f16,
371
+ &used);
372
+ nk_dispatch_f16_find_(caps, nk_kernel_maxsim_packed_size_k, (nk_kernel_punned_t *)&t->maxsim_packed_size_f16,
373
+ &used);
374
+ nk_dispatch_f16_find_(caps, nk_kernel_maxsim_pack_k, (nk_kernel_punned_t *)&t->maxsim_pack_f16, &used);
375
+ nk_dispatch_f16_find_(caps, nk_kernel_maxsim_packed_k, (nk_kernel_punned_t *)&t->maxsim_packed_f16, &used);
376
+ }
@@ -0,0 +1,58 @@
1
+ /**
2
+ * @brief Dispatch Initialization for F16C Data Types.
3
+ * @file c/dispatch_f16c.c
4
+ * @author Ash Vardanian
5
+ * @date February 3, 2026
6
+ */
7
+ #include "dispatch.h"
8
+
9
+ void nk_dispatch_f16c_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_punned_t *m, nk_capability_t *c) {
10
+ typedef nk_kernel_punned_t m_t;
11
+ #if NK_TARGET_SVEHALF
12
+ if (v & nk_cap_svehalf_k) switch (k) {
13
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_f16c_svehalf, *c = nk_cap_svehalf_k; return;
14
+ case nk_kernel_vdot_k: *m = (m_t)&nk_vdot_f16c_svehalf, *c = nk_cap_svehalf_k; return;
15
+ default: break;
16
+ }
17
+ #endif
18
+ #if NK_TARGET_NEONFHM
19
+ if (v & nk_cap_neonfhm_k) switch (k) {
20
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_f16c_neonfhm, *c = nk_cap_neonfhm_k; return;
21
+ case nk_kernel_vdot_k: *m = (m_t)&nk_vdot_f16c_neonfhm, *c = nk_cap_neonfhm_k; return;
22
+ default: break;
23
+ }
24
+ #endif
25
+ #if NK_TARGET_NEONHALF
26
+ if (v & nk_cap_neonhalf_k) switch (k) {
27
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_f16c_neonhalf, *c = nk_cap_neonhalf_k; return;
28
+ case nk_kernel_vdot_k: *m = (m_t)&nk_vdot_f16c_neonhalf, *c = nk_cap_neonhalf_k; return;
29
+ case nk_kernel_bilinear_k: *m = (m_t)&nk_bilinear_f16c_neonhalf, *c = nk_cap_neonbfdot_k; return;
30
+ default: break;
31
+ }
32
+ #endif
33
+ #if NK_TARGET_HASWELL
34
+ if (v & nk_cap_haswell_k) switch (k) {
35
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_f16c_haswell, *c = nk_cap_haswell_k; return;
36
+ case nk_kernel_vdot_k: *m = (m_t)&nk_vdot_f16c_haswell, *c = nk_cap_haswell_k; return;
37
+ default: break;
38
+ }
39
+ #endif
40
+ if (v & nk_cap_serial_k) switch (k) {
41
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_f16c_serial, *c = nk_cap_serial_k; return;
42
+ case nk_kernel_vdot_k: *m = (m_t)&nk_vdot_f16c_serial, *c = nk_cap_serial_k; return;
43
+ case nk_kernel_bilinear_k: *m = (m_t)&nk_bilinear_f16c_serial, *c = nk_cap_serial_k; return;
44
+ default: break;
45
+ }
46
+
47
+ // Error fallback - zero capability signals lookup failure
48
+ *m = (m_t)nk_error_dense_, *c = 0;
49
+ }
50
+
51
+ void nk_dispatch_f16c_init_(nk_capability_t caps) {
52
+ nk_implementations_t *t = &nk_dispatch_table;
53
+ nk_capability_t used;
54
+
55
+ nk_dispatch_f16c_find_(caps, nk_kernel_dot_k, (nk_kernel_punned_t *)&t->dot_f16c, &used);
56
+ nk_dispatch_f16c_find_(caps, nk_kernel_vdot_k, (nk_kernel_punned_t *)&t->vdot_f16c, &used);
57
+ nk_dispatch_f16c_find_(caps, nk_kernel_bilinear_k, (nk_kernel_punned_t *)&t->bilinear_f16c, &used);
58
+ }