numkong 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (294) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +495 -0
  3. package/binding.gyp +540 -0
  4. package/c/dispatch.h +512 -0
  5. package/c/dispatch_bf16.c +389 -0
  6. package/c/dispatch_bf16c.c +52 -0
  7. package/c/dispatch_e2m3.c +263 -0
  8. package/c/dispatch_e3m2.c +243 -0
  9. package/c/dispatch_e4m3.c +276 -0
  10. package/c/dispatch_e5m2.c +272 -0
  11. package/c/dispatch_f16.c +376 -0
  12. package/c/dispatch_f16c.c +58 -0
  13. package/c/dispatch_f32.c +378 -0
  14. package/c/dispatch_f32c.c +99 -0
  15. package/c/dispatch_f64.c +296 -0
  16. package/c/dispatch_f64c.c +98 -0
  17. package/c/dispatch_i16.c +96 -0
  18. package/c/dispatch_i32.c +89 -0
  19. package/c/dispatch_i4.c +150 -0
  20. package/c/dispatch_i64.c +86 -0
  21. package/c/dispatch_i8.c +289 -0
  22. package/c/dispatch_other.c +330 -0
  23. package/c/dispatch_u1.c +148 -0
  24. package/c/dispatch_u16.c +124 -0
  25. package/c/dispatch_u32.c +118 -0
  26. package/c/dispatch_u4.c +150 -0
  27. package/c/dispatch_u64.c +102 -0
  28. package/c/dispatch_u8.c +303 -0
  29. package/c/numkong.c +950 -0
  30. package/include/README.md +573 -0
  31. package/include/module.modulemap +129 -0
  32. package/include/numkong/attention/sapphireamx.h +1361 -0
  33. package/include/numkong/attention/sme.h +2066 -0
  34. package/include/numkong/attention.h +49 -0
  35. package/include/numkong/capabilities.h +748 -0
  36. package/include/numkong/cast/README.md +262 -0
  37. package/include/numkong/cast/haswell.h +975 -0
  38. package/include/numkong/cast/icelake.h +470 -0
  39. package/include/numkong/cast/neon.h +1192 -0
  40. package/include/numkong/cast/rvv.h +1021 -0
  41. package/include/numkong/cast/sapphire.h +262 -0
  42. package/include/numkong/cast/serial.h +2262 -0
  43. package/include/numkong/cast/skylake.h +856 -0
  44. package/include/numkong/cast/v128relaxed.h +180 -0
  45. package/include/numkong/cast.h +230 -0
  46. package/include/numkong/curved/README.md +223 -0
  47. package/include/numkong/curved/genoa.h +182 -0
  48. package/include/numkong/curved/haswell.h +276 -0
  49. package/include/numkong/curved/neon.h +205 -0
  50. package/include/numkong/curved/neonbfdot.h +212 -0
  51. package/include/numkong/curved/neonhalf.h +212 -0
  52. package/include/numkong/curved/rvv.h +305 -0
  53. package/include/numkong/curved/serial.h +207 -0
  54. package/include/numkong/curved/skylake.h +457 -0
  55. package/include/numkong/curved/smef64.h +506 -0
  56. package/include/numkong/curved.h +517 -0
  57. package/include/numkong/curved.hpp +144 -0
  58. package/include/numkong/dot/README.md +425 -0
  59. package/include/numkong/dot/alder.h +563 -0
  60. package/include/numkong/dot/genoa.h +315 -0
  61. package/include/numkong/dot/haswell.h +1688 -0
  62. package/include/numkong/dot/icelake.h +883 -0
  63. package/include/numkong/dot/neon.h +818 -0
  64. package/include/numkong/dot/neonbfdot.h +244 -0
  65. package/include/numkong/dot/neonfhm.h +360 -0
  66. package/include/numkong/dot/neonhalf.h +198 -0
  67. package/include/numkong/dot/neonsdot.h +508 -0
  68. package/include/numkong/dot/rvv.h +714 -0
  69. package/include/numkong/dot/rvvbb.h +72 -0
  70. package/include/numkong/dot/rvvbf16.h +123 -0
  71. package/include/numkong/dot/rvvhalf.h +129 -0
  72. package/include/numkong/dot/sapphire.h +141 -0
  73. package/include/numkong/dot/serial.h +838 -0
  74. package/include/numkong/dot/sierra.h +405 -0
  75. package/include/numkong/dot/skylake.h +1084 -0
  76. package/include/numkong/dot/sve.h +379 -0
  77. package/include/numkong/dot/svebfdot.h +74 -0
  78. package/include/numkong/dot/svehalf.h +123 -0
  79. package/include/numkong/dot/v128relaxed.h +1258 -0
  80. package/include/numkong/dot.h +1070 -0
  81. package/include/numkong/dot.hpp +94 -0
  82. package/include/numkong/dots/README.md +496 -0
  83. package/include/numkong/dots/alder.h +114 -0
  84. package/include/numkong/dots/genoa.h +94 -0
  85. package/include/numkong/dots/haswell.h +295 -0
  86. package/include/numkong/dots/icelake.h +171 -0
  87. package/include/numkong/dots/neon.h +120 -0
  88. package/include/numkong/dots/neonbfdot.h +58 -0
  89. package/include/numkong/dots/neonfhm.h +94 -0
  90. package/include/numkong/dots/neonhalf.h +57 -0
  91. package/include/numkong/dots/neonsdot.h +108 -0
  92. package/include/numkong/dots/rvv.h +2486 -0
  93. package/include/numkong/dots/sapphireamx.h +3973 -0
  94. package/include/numkong/dots/serial.h +2844 -0
  95. package/include/numkong/dots/sierra.h +97 -0
  96. package/include/numkong/dots/skylake.h +196 -0
  97. package/include/numkong/dots/sme.h +5372 -0
  98. package/include/numkong/dots/smebi32.h +461 -0
  99. package/include/numkong/dots/smef64.h +1318 -0
  100. package/include/numkong/dots/smehalf.h +47 -0
  101. package/include/numkong/dots/v128relaxed.h +294 -0
  102. package/include/numkong/dots.h +2804 -0
  103. package/include/numkong/dots.hpp +639 -0
  104. package/include/numkong/each/README.md +469 -0
  105. package/include/numkong/each/haswell.h +1658 -0
  106. package/include/numkong/each/icelake.h +272 -0
  107. package/include/numkong/each/neon.h +1104 -0
  108. package/include/numkong/each/neonbfdot.h +212 -0
  109. package/include/numkong/each/neonhalf.h +410 -0
  110. package/include/numkong/each/rvv.h +1121 -0
  111. package/include/numkong/each/sapphire.h +477 -0
  112. package/include/numkong/each/serial.h +260 -0
  113. package/include/numkong/each/skylake.h +1562 -0
  114. package/include/numkong/each.h +2146 -0
  115. package/include/numkong/each.hpp +434 -0
  116. package/include/numkong/geospatial/README.md +147 -0
  117. package/include/numkong/geospatial/haswell.h +593 -0
  118. package/include/numkong/geospatial/neon.h +571 -0
  119. package/include/numkong/geospatial/rvv.h +701 -0
  120. package/include/numkong/geospatial/serial.h +309 -0
  121. package/include/numkong/geospatial/skylake.h +577 -0
  122. package/include/numkong/geospatial/v128relaxed.h +613 -0
  123. package/include/numkong/geospatial.h +453 -0
  124. package/include/numkong/geospatial.hpp +235 -0
  125. package/include/numkong/matrix.hpp +336 -0
  126. package/include/numkong/maxsim/README.md +187 -0
  127. package/include/numkong/maxsim/alder.h +511 -0
  128. package/include/numkong/maxsim/genoa.h +115 -0
  129. package/include/numkong/maxsim/haswell.h +553 -0
  130. package/include/numkong/maxsim/icelake.h +480 -0
  131. package/include/numkong/maxsim/neonsdot.h +394 -0
  132. package/include/numkong/maxsim/sapphireamx.h +877 -0
  133. package/include/numkong/maxsim/serial.h +490 -0
  134. package/include/numkong/maxsim/sme.h +929 -0
  135. package/include/numkong/maxsim/v128relaxed.h +280 -0
  136. package/include/numkong/maxsim.h +571 -0
  137. package/include/numkong/maxsim.hpp +133 -0
  138. package/include/numkong/mesh/README.md +227 -0
  139. package/include/numkong/mesh/haswell.h +2235 -0
  140. package/include/numkong/mesh/neon.h +1329 -0
  141. package/include/numkong/mesh/neonbfdot.h +842 -0
  142. package/include/numkong/mesh/neonhalf.h +616 -0
  143. package/include/numkong/mesh/rvv.h +916 -0
  144. package/include/numkong/mesh/serial.h +742 -0
  145. package/include/numkong/mesh/skylake.h +1135 -0
  146. package/include/numkong/mesh/v128relaxed.h +1052 -0
  147. package/include/numkong/mesh.h +652 -0
  148. package/include/numkong/mesh.hpp +762 -0
  149. package/include/numkong/numkong.h +78 -0
  150. package/include/numkong/numkong.hpp +57 -0
  151. package/include/numkong/probability/README.md +173 -0
  152. package/include/numkong/probability/haswell.h +267 -0
  153. package/include/numkong/probability/neon.h +225 -0
  154. package/include/numkong/probability/rvv.h +409 -0
  155. package/include/numkong/probability/serial.h +169 -0
  156. package/include/numkong/probability/skylake.h +324 -0
  157. package/include/numkong/probability.h +383 -0
  158. package/include/numkong/probability.hpp +120 -0
  159. package/include/numkong/random.h +50 -0
  160. package/include/numkong/random.hpp +285 -0
  161. package/include/numkong/reduce/README.md +547 -0
  162. package/include/numkong/reduce/alder.h +632 -0
  163. package/include/numkong/reduce/genoa.h +201 -0
  164. package/include/numkong/reduce/haswell.h +3783 -0
  165. package/include/numkong/reduce/icelake.h +549 -0
  166. package/include/numkong/reduce/neon.h +3841 -0
  167. package/include/numkong/reduce/neonbfdot.h +353 -0
  168. package/include/numkong/reduce/neonfhm.h +665 -0
  169. package/include/numkong/reduce/neonhalf.h +157 -0
  170. package/include/numkong/reduce/neonsdot.h +357 -0
  171. package/include/numkong/reduce/rvv.h +3407 -0
  172. package/include/numkong/reduce/serial.h +757 -0
  173. package/include/numkong/reduce/sierra.h +338 -0
  174. package/include/numkong/reduce/skylake.h +3792 -0
  175. package/include/numkong/reduce/v128relaxed.h +2302 -0
  176. package/include/numkong/reduce.h +1597 -0
  177. package/include/numkong/reduce.hpp +633 -0
  178. package/include/numkong/scalar/README.md +89 -0
  179. package/include/numkong/scalar/haswell.h +113 -0
  180. package/include/numkong/scalar/neon.h +122 -0
  181. package/include/numkong/scalar/neonhalf.h +70 -0
  182. package/include/numkong/scalar/rvv.h +211 -0
  183. package/include/numkong/scalar/sapphire.h +63 -0
  184. package/include/numkong/scalar/serial.h +332 -0
  185. package/include/numkong/scalar/v128relaxed.h +56 -0
  186. package/include/numkong/scalar.h +683 -0
  187. package/include/numkong/set/README.md +179 -0
  188. package/include/numkong/set/haswell.h +334 -0
  189. package/include/numkong/set/icelake.h +485 -0
  190. package/include/numkong/set/neon.h +364 -0
  191. package/include/numkong/set/rvv.h +226 -0
  192. package/include/numkong/set/rvvbb.h +117 -0
  193. package/include/numkong/set/serial.h +174 -0
  194. package/include/numkong/set/sve.h +185 -0
  195. package/include/numkong/set/v128relaxed.h +240 -0
  196. package/include/numkong/set.h +457 -0
  197. package/include/numkong/set.hpp +114 -0
  198. package/include/numkong/sets/README.md +149 -0
  199. package/include/numkong/sets/haswell.h +63 -0
  200. package/include/numkong/sets/icelake.h +66 -0
  201. package/include/numkong/sets/neon.h +61 -0
  202. package/include/numkong/sets/serial.h +43 -0
  203. package/include/numkong/sets/smebi32.h +1099 -0
  204. package/include/numkong/sets/v128relaxed.h +58 -0
  205. package/include/numkong/sets.h +339 -0
  206. package/include/numkong/sparse/README.md +156 -0
  207. package/include/numkong/sparse/icelake.h +463 -0
  208. package/include/numkong/sparse/neon.h +288 -0
  209. package/include/numkong/sparse/serial.h +117 -0
  210. package/include/numkong/sparse/sve2.h +507 -0
  211. package/include/numkong/sparse/turin.h +322 -0
  212. package/include/numkong/sparse.h +363 -0
  213. package/include/numkong/sparse.hpp +113 -0
  214. package/include/numkong/spatial/README.md +435 -0
  215. package/include/numkong/spatial/alder.h +607 -0
  216. package/include/numkong/spatial/genoa.h +290 -0
  217. package/include/numkong/spatial/haswell.h +960 -0
  218. package/include/numkong/spatial/icelake.h +586 -0
  219. package/include/numkong/spatial/neon.h +773 -0
  220. package/include/numkong/spatial/neonbfdot.h +165 -0
  221. package/include/numkong/spatial/neonhalf.h +118 -0
  222. package/include/numkong/spatial/neonsdot.h +261 -0
  223. package/include/numkong/spatial/rvv.h +984 -0
  224. package/include/numkong/spatial/rvvbf16.h +123 -0
  225. package/include/numkong/spatial/rvvhalf.h +117 -0
  226. package/include/numkong/spatial/sapphire.h +343 -0
  227. package/include/numkong/spatial/serial.h +346 -0
  228. package/include/numkong/spatial/sierra.h +323 -0
  229. package/include/numkong/spatial/skylake.h +606 -0
  230. package/include/numkong/spatial/sve.h +224 -0
  231. package/include/numkong/spatial/svebfdot.h +122 -0
  232. package/include/numkong/spatial/svehalf.h +109 -0
  233. package/include/numkong/spatial/v128relaxed.h +717 -0
  234. package/include/numkong/spatial.h +1425 -0
  235. package/include/numkong/spatial.hpp +183 -0
  236. package/include/numkong/spatials/README.md +580 -0
  237. package/include/numkong/spatials/alder.h +94 -0
  238. package/include/numkong/spatials/genoa.h +94 -0
  239. package/include/numkong/spatials/haswell.h +219 -0
  240. package/include/numkong/spatials/icelake.h +113 -0
  241. package/include/numkong/spatials/neon.h +109 -0
  242. package/include/numkong/spatials/neonbfdot.h +60 -0
  243. package/include/numkong/spatials/neonfhm.h +92 -0
  244. package/include/numkong/spatials/neonhalf.h +58 -0
  245. package/include/numkong/spatials/neonsdot.h +109 -0
  246. package/include/numkong/spatials/rvv.h +1960 -0
  247. package/include/numkong/spatials/sapphireamx.h +1149 -0
  248. package/include/numkong/spatials/serial.h +226 -0
  249. package/include/numkong/spatials/sierra.h +96 -0
  250. package/include/numkong/spatials/skylake.h +184 -0
  251. package/include/numkong/spatials/sme.h +1901 -0
  252. package/include/numkong/spatials/smef64.h +465 -0
  253. package/include/numkong/spatials/v128relaxed.h +240 -0
  254. package/include/numkong/spatials.h +3021 -0
  255. package/include/numkong/spatials.hpp +508 -0
  256. package/include/numkong/tensor.hpp +1592 -0
  257. package/include/numkong/trigonometry/README.md +184 -0
  258. package/include/numkong/trigonometry/haswell.h +652 -0
  259. package/include/numkong/trigonometry/neon.h +639 -0
  260. package/include/numkong/trigonometry/rvv.h +699 -0
  261. package/include/numkong/trigonometry/serial.h +703 -0
  262. package/include/numkong/trigonometry/skylake.h +721 -0
  263. package/include/numkong/trigonometry/v128relaxed.h +666 -0
  264. package/include/numkong/trigonometry.h +467 -0
  265. package/include/numkong/trigonometry.hpp +166 -0
  266. package/include/numkong/types.h +1384 -0
  267. package/include/numkong/types.hpp +5603 -0
  268. package/include/numkong/vector.hpp +698 -0
  269. package/javascript/README.md +246 -0
  270. package/javascript/dist/cjs/numkong-wasm.d.ts +166 -0
  271. package/javascript/dist/cjs/numkong-wasm.js +617 -0
  272. package/javascript/dist/cjs/numkong.d.ts +343 -0
  273. package/javascript/dist/cjs/numkong.js +523 -0
  274. package/javascript/dist/cjs/package.json +3 -0
  275. package/javascript/dist/cjs/types.d.ts +284 -0
  276. package/javascript/dist/cjs/types.js +653 -0
  277. package/javascript/dist/esm/numkong-wasm.d.ts +166 -0
  278. package/javascript/dist/esm/numkong-wasm.js +595 -0
  279. package/javascript/dist/esm/numkong.d.ts +343 -0
  280. package/javascript/dist/esm/numkong.js +452 -0
  281. package/javascript/dist/esm/package.json +3 -0
  282. package/javascript/dist/esm/types.d.ts +284 -0
  283. package/javascript/dist/esm/types.js +630 -0
  284. package/javascript/dist-package-cjs.json +3 -0
  285. package/javascript/dist-package-esm.json +3 -0
  286. package/javascript/node-gyp-build.d.ts +1 -0
  287. package/javascript/numkong-wasm.ts +756 -0
  288. package/javascript/numkong.c +689 -0
  289. package/javascript/numkong.ts +575 -0
  290. package/javascript/tsconfig-base.json +39 -0
  291. package/javascript/tsconfig-cjs.json +8 -0
  292. package/javascript/tsconfig-esm.json +8 -0
  293. package/javascript/types.ts +674 -0
  294. package/package.json +87 -0
@@ -0,0 +1,118 @@
1
+ /**
2
+ * @brief Dispatch Initialization for U32 Data Types.
3
+ * @file c/dispatch_u32.c
4
+ * @author Ash Vardanian
5
+ * @date February 3, 2026
6
+ */
7
+ #include "dispatch.h"
8
+
9
+ void nk_dispatch_u32_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_punned_t *m, nk_capability_t *c) {
10
+ typedef nk_kernel_punned_t m_t;
11
+ #if NK_TARGET_V128RELAXED
12
+ if (v & nk_cap_v128relaxed_k) switch (k) {
13
+ case nk_kernel_jaccard_k: *m = (m_t)&nk_jaccard_u32_v128relaxed, *c = nk_cap_v128relaxed_k; return;
14
+ case nk_kernel_reduce_moments_k:
15
+ *m = (m_t)&nk_reduce_moments_u32_v128relaxed, *c = nk_cap_v128relaxed_k;
16
+ return;
17
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_u32_v128relaxed, *c = nk_cap_v128relaxed_k; return;
18
+ default: break;
19
+ }
20
+ #endif
21
+ #if NK_TARGET_SVE2
22
+ if (v & nk_cap_sve2_k) switch (k) {
23
+ case nk_kernel_sparse_intersect_k: *m = (m_t)&nk_sparse_intersect_u32_sve2, *c = nk_cap_sve2_k; return;
24
+ default: break;
25
+ }
26
+ #endif
27
+ #if NK_TARGET_SVE
28
+ if (v & nk_cap_sve_k) switch (k) {
29
+ case nk_kernel_jaccard_k: *m = (m_t)&nk_jaccard_u32_sve, *c = nk_cap_sve_k; return;
30
+ default: break;
31
+ }
32
+ #endif
33
+ #if NK_TARGET_NEON
34
+ if (v & nk_cap_neon_k) switch (k) {
35
+ case nk_kernel_sparse_intersect_k: *m = (m_t)&nk_sparse_intersect_u32_neon, *c = nk_cap_neon_k; return;
36
+ case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_u32_neon, *c = nk_cap_neon_k; return;
37
+ case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_u32_neon, *c = nk_cap_neon_k; return;
38
+ case nk_kernel_each_sum_k: *m = (m_t)&nk_each_sum_u32_neon, *c = nk_cap_neon_k; return;
39
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_u32_neon, *c = nk_cap_neon_k; return;
40
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_u32_neon, *c = nk_cap_neon_k; return;
41
+ case nk_kernel_jaccard_k: *m = (m_t)&nk_jaccard_u32_neon, *c = nk_cap_neon_k; return;
42
+ default: break;
43
+ }
44
+ #endif
45
+ #if NK_TARGET_TURIN
46
+ if (v & nk_cap_turin_k) switch (k) {
47
+ case nk_kernel_sparse_intersect_k: *m = (m_t)&nk_sparse_intersect_u32_turin, *c = nk_cap_skylake_k; return;
48
+ default: break;
49
+ }
50
+ #endif
51
+ #if NK_TARGET_ICELAKE
52
+ if (v & nk_cap_icelake_k) switch (k) {
53
+ case nk_kernel_sparse_intersect_k: *m = (m_t)&nk_sparse_intersect_u32_icelake, *c = nk_cap_skylake_k; return;
54
+ case nk_kernel_each_sum_k: *m = (m_t)&nk_each_sum_u32_icelake, *c = nk_cap_icelake_k; return;
55
+ case nk_kernel_jaccard_k: *m = (m_t)&nk_jaccard_u32_icelake, *c = nk_cap_icelake_k; return;
56
+ default: break;
57
+ }
58
+ #endif
59
+ #if NK_TARGET_SKYLAKE
60
+ if (v & nk_cap_skylake_k) switch (k) {
61
+ case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_u32_skylake, *c = nk_cap_skylake_k; return;
62
+ case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_u32_skylake, *c = nk_cap_skylake_k; return;
63
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_u32_skylake, *c = nk_cap_skylake_k; return;
64
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_u32_skylake, *c = nk_cap_skylake_k; return;
65
+ default: break;
66
+ }
67
+ #endif
68
+ #if NK_TARGET_HASWELL
69
+ if (v & nk_cap_haswell_k) switch (k) {
70
+ case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_u32_haswell, *c = nk_cap_haswell_k; return;
71
+ case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_u32_haswell, *c = nk_cap_haswell_k; return;
72
+ case nk_kernel_each_sum_k: *m = (m_t)&nk_each_sum_u32_haswell, *c = nk_cap_haswell_k; return;
73
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_u32_haswell, *c = nk_cap_haswell_k; return;
74
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_u32_haswell, *c = nk_cap_haswell_k; return;
75
+ case nk_kernel_jaccard_k: *m = (m_t)&nk_jaccard_u32_haswell, *c = nk_cap_haswell_k; return;
76
+ default: break;
77
+ }
78
+ #endif
79
+ #if NK_TARGET_RVV
80
+ if (v & nk_cap_rvv_k) switch (k) {
81
+ case nk_kernel_jaccard_k: *m = (m_t)&nk_jaccard_u32_rvv, *c = nk_cap_rvv_k; return;
82
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_u32_rvv, *c = nk_cap_rvv_k; return;
83
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_u32_rvv, *c = nk_cap_rvv_k; return;
84
+ case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_u32_rvv, *c = nk_cap_rvv_k; return;
85
+ case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_u32_rvv, *c = nk_cap_rvv_k; return;
86
+ case nk_kernel_each_sum_k: *m = (m_t)&nk_each_sum_u32_rvv, *c = nk_cap_rvv_k; return;
87
+ default: break;
88
+ }
89
+ #endif
90
+ if (v & nk_cap_serial_k) switch (k) {
91
+ case nk_kernel_sparse_intersect_k: *m = (m_t)&nk_sparse_intersect_u32_serial, *c = nk_cap_serial_k; return;
92
+ case nk_kernel_jaccard_k: *m = (m_t)&nk_jaccard_u32_serial, *c = nk_cap_serial_k; return;
93
+ case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_u32_serial, *c = nk_cap_serial_k; return;
94
+ case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_u32_serial, *c = nk_cap_serial_k; return;
95
+ case nk_kernel_each_sum_k: *m = (m_t)&nk_each_sum_u32_serial, *c = nk_cap_serial_k; return;
96
+ case nk_kernel_each_blend_k: *m = (m_t)&nk_each_blend_u32_serial, *c = nk_cap_serial_k; return;
97
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_u32_serial, *c = nk_cap_serial_k; return;
98
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_u32_serial, *c = nk_cap_serial_k; return;
99
+ default: break;
100
+ }
101
+
102
+ // Error fallback - zero capability signals lookup failure
103
+ *m = (m_t)nk_error_dense_, *c = 0;
104
+ }
105
+
106
+ void nk_dispatch_u32_init_(nk_capability_t caps) {
107
+ nk_implementations_t *t = &nk_dispatch_table;
108
+ nk_capability_t used;
109
+
110
+ nk_dispatch_u32_find_(caps, nk_kernel_jaccard_k, (nk_kernel_punned_t *)&t->jaccard_u32, &used);
111
+ nk_dispatch_u32_find_(caps, nk_kernel_sparse_intersect_k, (nk_kernel_punned_t *)&t->sparse_intersect_u32, &used);
112
+ nk_dispatch_u32_find_(caps, nk_kernel_each_scale_k, (nk_kernel_punned_t *)&t->each_scale_u32, &used);
113
+ nk_dispatch_u32_find_(caps, nk_kernel_each_sum_k, (nk_kernel_punned_t *)&t->each_sum_u32, &used);
114
+ nk_dispatch_u32_find_(caps, nk_kernel_each_blend_k, (nk_kernel_punned_t *)&t->each_blend_u32, &used);
115
+ nk_dispatch_u32_find_(caps, nk_kernel_each_fma_k, (nk_kernel_punned_t *)&t->each_fma_u32, &used);
116
+ nk_dispatch_u32_find_(caps, nk_kernel_reduce_moments_k, (nk_kernel_punned_t *)&t->reduce_moments_u32, &used);
117
+ nk_dispatch_u32_find_(caps, nk_kernel_reduce_minmax_k, (nk_kernel_punned_t *)&t->reduce_minmax_u32, &used);
118
+ }
@@ -0,0 +1,150 @@
1
+ /**
2
+ * @brief Dispatch Initialization for U4 Data Types.
3
+ * @file c/dispatch_u4.c
4
+ * @author Ash Vardanian
5
+ * @date February 3, 2026
6
+ */
7
+ #include "dispatch.h"
8
+
9
+ #ifdef __cplusplus
10
+ extern "C" {
11
+ #endif
12
+
13
+ void nk_dispatch_u4_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_punned_t *m, nk_capability_t *c) {
14
+ typedef nk_kernel_punned_t m_t;
15
+ #if NK_TARGET_V128RELAXED
16
+ if (v & nk_cap_v128relaxed_k) switch (k) {
17
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_u4_v128relaxed, *c = nk_cap_v128relaxed_k; return;
18
+ case nk_kernel_dots_packed_size_k:
19
+ *m = (m_t)&nk_dots_packed_size_u4_v128relaxed, *c = nk_cap_v128relaxed_k;
20
+ return;
21
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_u4_v128relaxed, *c = nk_cap_v128relaxed_k; return;
22
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_u4_v128relaxed, *c = nk_cap_v128relaxed_k; return;
23
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_u4_v128relaxed, *c = nk_cap_v128relaxed_k; return;
24
+ default: break;
25
+ }
26
+ #endif
27
+ #if NK_TARGET_SME
28
+ if (v & nk_cap_sme_k) switch (k) {
29
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_u4_sme, *c = nk_cap_sme_k; return;
30
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_u4_sme, *c = nk_cap_sme_k; return;
31
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_u4_sme, *c = nk_cap_sme_k; return;
32
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_u4_sme, *c = nk_cap_sme_k; return;
33
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_u4_sme, *c = nk_cap_sme_k; return;
34
+ case nk_kernel_angulars_symmetric_k: *m = (m_t)&nk_angulars_symmetric_u4_sme, *c = nk_cap_sme_k; return;
35
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_u4_sme, *c = nk_cap_sme_k; return;
36
+ case nk_kernel_euclideans_symmetric_k: *m = (m_t)&nk_euclideans_symmetric_u4_sme, *c = nk_cap_sme_k; return;
37
+ default: break;
38
+ }
39
+ #endif
40
+ #if NK_TARGET_NEONSDOT
41
+ if (v & nk_cap_neonsdot_k) switch (k) {
42
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_u4_neonsdot, *c = nk_cap_neonsdot_k; return;
43
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_u4_neonsdot, *c = nk_cap_neonsdot_k; return;
44
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_u4_neonsdot, *c = nk_cap_neonsdot_k; return;
45
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_u4_neonsdot, *c = nk_cap_neonsdot_k; return;
46
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_u4_neonsdot, *c = nk_cap_neonsdot_k; return;
47
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_u4_neonsdot, *c = nk_cap_neonsdot_k; return;
48
+ case nk_kernel_angulars_symmetric_k:
49
+ *m = (m_t)&nk_angulars_symmetric_u4_neonsdot, *c = nk_cap_neonsdot_k;
50
+ return;
51
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_u4_neonsdot, *c = nk_cap_neonsdot_k; return;
52
+ case nk_kernel_euclideans_symmetric_k:
53
+ *m = (m_t)&nk_euclideans_symmetric_u4_neonsdot, *c = nk_cap_neonsdot_k;
54
+ return;
55
+ default: break;
56
+ }
57
+ #endif
58
+ #if NK_TARGET_ICELAKE
59
+ if (v & nk_cap_icelake_k) switch (k) {
60
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_u4_icelake, *c = nk_cap_icelake_k; return;
61
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_u4_icelake, *c = nk_cap_icelake_k; return;
62
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_u4_icelake, *c = nk_cap_icelake_k; return;
63
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_u4_icelake, *c = nk_cap_icelake_k; return;
64
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_u4_icelake, *c = nk_cap_icelake_k; return;
65
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_u4_icelake, *c = nk_cap_icelake_k; return;
66
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_u4_icelake, *c = nk_cap_icelake_k; return;
67
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_u4_icelake, *c = nk_cap_icelake_k; return;
68
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_u4_icelake, *c = nk_cap_icelake_k; return;
69
+ case nk_kernel_angulars_symmetric_k: *m = (m_t)&nk_angulars_symmetric_u4_icelake, *c = nk_cap_icelake_k; return;
70
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_u4_icelake, *c = nk_cap_icelake_k; return;
71
+ case nk_kernel_euclideans_symmetric_k:
72
+ *m = (m_t)&nk_euclideans_symmetric_u4_icelake, *c = nk_cap_icelake_k;
73
+ return;
74
+ default: break;
75
+ }
76
+ #endif
77
+ #if NK_TARGET_SKYLAKE
78
+ if (v & nk_cap_skylake_k) switch (k) {
79
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_u4_skylake, *c = nk_cap_skylake_k; return;
80
+ default: break;
81
+ }
82
+ #endif
83
+ #if NK_TARGET_HASWELL
84
+ if (v & nk_cap_haswell_k) switch (k) {
85
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_u4_haswell, *c = nk_cap_haswell_k; return;
86
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_u4_haswell, *c = nk_cap_haswell_k; return;
87
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_u4_haswell, *c = nk_cap_haswell_k; return;
88
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_u4_haswell, *c = nk_cap_haswell_k; return;
89
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_u4_haswell, *c = nk_cap_haswell_k; return;
90
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_u4_haswell, *c = nk_cap_haswell_k; return;
91
+ default: break;
92
+ }
93
+ #endif
94
+ #if NK_TARGET_RVV
95
+ if (v & nk_cap_rvv_k) switch (k) {
96
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_u4_rvv, *c = nk_cap_rvv_k; return;
97
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_u4_rvv, *c = nk_cap_rvv_k; return;
98
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_u4_rvv, *c = nk_cap_rvv_k; return;
99
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_u4_rvv, *c = nk_cap_rvv_k; return;
100
+ default: break;
101
+ }
102
+ #endif
103
+ if (v & nk_cap_serial_k) switch (k) {
104
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_u4_serial, *c = nk_cap_serial_k; return;
105
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_u4_serial, *c = nk_cap_serial_k; return;
106
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_u4_serial, *c = nk_cap_serial_k; return;
107
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_u4_serial, *c = nk_cap_serial_k; return;
108
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_u4_serial, *c = nk_cap_serial_k; return;
109
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_u4_serial, *c = nk_cap_serial_k; return;
110
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_u4_serial, *c = nk_cap_serial_k; return;
111
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_u4_serial, *c = nk_cap_serial_k; return;
112
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_u4_serial, *c = nk_cap_serial_k; return;
113
+ case nk_kernel_angulars_symmetric_k: *m = (m_t)&nk_angulars_symmetric_u4_serial, *c = nk_cap_serial_k; return;
114
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_u4_serial, *c = nk_cap_serial_k; return;
115
+ case nk_kernel_euclideans_symmetric_k:
116
+ *m = (m_t)&nk_euclideans_symmetric_u4_serial, *c = nk_cap_serial_k;
117
+ return;
118
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_u4_serial, *c = nk_cap_serial_k; return;
119
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_u4_serial, *c = nk_cap_serial_k; return;
120
+ default: break;
121
+ }
122
+
123
+ // Error fallback - zero capability signals lookup failure
124
+ *m = (m_t)nk_error_dense_, *c = 0;
125
+ }
126
+
127
+ void nk_dispatch_u4_init_(nk_capability_t caps) {
128
+ nk_implementations_t *t = &nk_dispatch_table;
129
+ nk_capability_t used;
130
+
131
+ nk_dispatch_u4_find_(caps, nk_kernel_dot_k, (nk_kernel_punned_t *)&t->dot_u4, &used);
132
+ nk_dispatch_u4_find_(caps, nk_kernel_angular_k, (nk_kernel_punned_t *)&t->angular_u4, &used);
133
+ nk_dispatch_u4_find_(caps, nk_kernel_euclidean_k, (nk_kernel_punned_t *)&t->euclidean_u4, &used);
134
+ nk_dispatch_u4_find_(caps, nk_kernel_sqeuclidean_k, (nk_kernel_punned_t *)&t->sqeuclidean_u4, &used);
135
+ nk_dispatch_u4_find_(caps, nk_kernel_reduce_moments_k, (nk_kernel_punned_t *)&t->reduce_moments_u4, &used);
136
+ nk_dispatch_u4_find_(caps, nk_kernel_reduce_minmax_k, (nk_kernel_punned_t *)&t->reduce_minmax_u4, &used);
137
+ nk_dispatch_u4_find_(caps, nk_kernel_dots_packed_size_k, (nk_kernel_punned_t *)&t->dots_packed_size_u4, &used);
138
+ nk_dispatch_u4_find_(caps, nk_kernel_dots_pack_k, (nk_kernel_punned_t *)&t->dots_pack_u4, &used);
139
+ nk_dispatch_u4_find_(caps, nk_kernel_dots_packed_k, (nk_kernel_punned_t *)&t->dots_packed_u4, &used);
140
+ nk_dispatch_u4_find_(caps, nk_kernel_dots_symmetric_k, (nk_kernel_punned_t *)&t->dots_symmetric_u4, &used);
141
+ nk_dispatch_u4_find_(caps, nk_kernel_angulars_packed_k, (nk_kernel_punned_t *)&t->angulars_packed_u4, &used);
142
+ nk_dispatch_u4_find_(caps, nk_kernel_angulars_symmetric_k, (nk_kernel_punned_t *)&t->angulars_symmetric_u4, &used);
143
+ nk_dispatch_u4_find_(caps, nk_kernel_euclideans_packed_k, (nk_kernel_punned_t *)&t->euclideans_packed_u4, &used);
144
+ nk_dispatch_u4_find_(caps, nk_kernel_euclideans_symmetric_k, (nk_kernel_punned_t *)&t->euclideans_symmetric_u4,
145
+ &used);
146
+ }
147
+
148
+ #ifdef __cplusplus
149
+ }
150
+ #endif
@@ -0,0 +1,102 @@
1
+ /**
2
+ * @brief Dispatch Initialization for U64 Data Types.
3
+ * @file c/dispatch_u64.c
4
+ * @author Ash Vardanian
5
+ * @date February 3, 2026
6
+ */
7
+ #include "dispatch.h"
8
+
9
+ void nk_dispatch_u64_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_punned_t *m, nk_capability_t *c) {
10
+ typedef nk_kernel_punned_t m_t;
11
+ #if NK_TARGET_V128RELAXED
12
+ if (v & nk_cap_v128relaxed_k) switch (k) {
13
+ case nk_kernel_reduce_moments_k:
14
+ *m = (m_t)&nk_reduce_moments_u64_v128relaxed, *c = nk_cap_v128relaxed_k;
15
+ return;
16
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_u64_v128relaxed, *c = nk_cap_v128relaxed_k; return;
17
+ default: break;
18
+ }
19
+ #endif
20
+ #if NK_TARGET_SVE2
21
+ if (v & nk_cap_sve2_k) switch (k) {
22
+ case nk_kernel_sparse_intersect_k: *m = (m_t)&nk_sparse_intersect_u64_sve2, *c = nk_cap_sve2_k; return;
23
+ default: break;
24
+ }
25
+ #endif
26
+ #if NK_TARGET_NEON
27
+ if (v & nk_cap_neon_k) switch (k) {
28
+ case nk_kernel_sparse_intersect_k: *m = (m_t)&nk_sparse_intersect_u64_neon, *c = nk_cap_neon_k; return;
29
+ case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_u64_neon, *c = nk_cap_neon_k; return;
30
+ case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_u64_neon, *c = nk_cap_neon_k; return;
31
+ case nk_kernel_each_sum_k: *m = (m_t)&nk_each_sum_u64_neon, *c = nk_cap_neon_k; return;
32
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_u64_neon, *c = nk_cap_neon_k; return;
33
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_u64_neon, *c = nk_cap_neon_k; return;
34
+ default: break;
35
+ }
36
+ #endif
37
+ #if NK_TARGET_TURIN
38
+ if (v & nk_cap_turin_k) switch (k) {
39
+ case nk_kernel_sparse_intersect_k: *m = (m_t)&nk_sparse_intersect_u64_turin, *c = nk_cap_turin_k; return;
40
+ default: break;
41
+ }
42
+ #endif
43
+ #if NK_TARGET_ICELAKE
44
+ if (v & nk_cap_icelake_k) switch (k) {
45
+ case nk_kernel_sparse_intersect_k: *m = (m_t)&nk_sparse_intersect_u64_icelake, *c = nk_cap_icelake_k; return;
46
+ case nk_kernel_each_sum_k: *m = (m_t)&nk_each_sum_u64_icelake, *c = nk_cap_icelake_k; return;
47
+ default: break;
48
+ }
49
+ #endif
50
+ #if NK_TARGET_SKYLAKE
51
+ if (v & nk_cap_skylake_k) switch (k) {
52
+ case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_u64_skylake, *c = nk_cap_skylake_k; return;
53
+ case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_u64_skylake, *c = nk_cap_skylake_k; return;
54
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_u64_skylake, *c = nk_cap_skylake_k; return;
55
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_u64_skylake, *c = nk_cap_skylake_k; return;
56
+ default: break;
57
+ }
58
+ #endif
59
+ #if NK_TARGET_HASWELL
60
+ if (v & nk_cap_haswell_k) switch (k) {
61
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_u64_haswell, *c = nk_cap_haswell_k; return;
62
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_u64_haswell, *c = nk_cap_haswell_k; return;
63
+ default: break;
64
+ }
65
+ #endif
66
+ #if NK_TARGET_RVV
67
+ if (v & nk_cap_rvv_k) switch (k) {
68
+ case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_u64_rvv, *c = nk_cap_rvv_k; return;
69
+ case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_u64_rvv, *c = nk_cap_rvv_k; return;
70
+ case nk_kernel_each_sum_k: *m = (m_t)&nk_each_sum_u64_rvv, *c = nk_cap_rvv_k; return;
71
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_u64_rvv, *c = nk_cap_rvv_k; return;
72
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_u64_rvv, *c = nk_cap_rvv_k; return;
73
+ default: break;
74
+ }
75
+ #endif
76
+ if (v & nk_cap_serial_k) switch (k) {
77
+ case nk_kernel_sparse_intersect_k: *m = (m_t)&nk_sparse_intersect_u64_serial, *c = nk_cap_serial_k; return;
78
+ case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_u64_serial, *c = nk_cap_serial_k; return;
79
+ case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_u64_serial, *c = nk_cap_serial_k; return;
80
+ case nk_kernel_each_sum_k: *m = (m_t)&nk_each_sum_u64_serial, *c = nk_cap_serial_k; return;
81
+ case nk_kernel_each_blend_k: *m = (m_t)&nk_each_blend_u64_serial, *c = nk_cap_serial_k; return;
82
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_u64_serial, *c = nk_cap_serial_k; return;
83
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_u64_serial, *c = nk_cap_serial_k; return;
84
+ default: break;
85
+ }
86
+
87
+ // Error fallback - zero capability signals lookup failure
88
+ *m = (m_t)nk_error_dense_, *c = 0;
89
+ }
90
+
91
+ void nk_dispatch_u64_init_(nk_capability_t caps) {
92
+ nk_implementations_t *t = &nk_dispatch_table;
93
+ nk_capability_t used;
94
+
95
+ nk_dispatch_u64_find_(caps, nk_kernel_sparse_intersect_k, (nk_kernel_punned_t *)&t->sparse_intersect_u64, &used);
96
+ nk_dispatch_u64_find_(caps, nk_kernel_each_scale_k, (nk_kernel_punned_t *)&t->each_scale_u64, &used);
97
+ nk_dispatch_u64_find_(caps, nk_kernel_each_sum_k, (nk_kernel_punned_t *)&t->each_sum_u64, &used);
98
+ nk_dispatch_u64_find_(caps, nk_kernel_each_blend_k, (nk_kernel_punned_t *)&t->each_blend_u64, &used);
99
+ nk_dispatch_u64_find_(caps, nk_kernel_each_fma_k, (nk_kernel_punned_t *)&t->each_fma_u64, &used);
100
+ nk_dispatch_u64_find_(caps, nk_kernel_reduce_moments_k, (nk_kernel_punned_t *)&t->reduce_moments_u64, &used);
101
+ nk_dispatch_u64_find_(caps, nk_kernel_reduce_minmax_k, (nk_kernel_punned_t *)&t->reduce_minmax_u64, &used);
102
+ }