numkong 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (294) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +495 -0
  3. package/binding.gyp +540 -0
  4. package/c/dispatch.h +512 -0
  5. package/c/dispatch_bf16.c +389 -0
  6. package/c/dispatch_bf16c.c +52 -0
  7. package/c/dispatch_e2m3.c +263 -0
  8. package/c/dispatch_e3m2.c +243 -0
  9. package/c/dispatch_e4m3.c +276 -0
  10. package/c/dispatch_e5m2.c +272 -0
  11. package/c/dispatch_f16.c +376 -0
  12. package/c/dispatch_f16c.c +58 -0
  13. package/c/dispatch_f32.c +378 -0
  14. package/c/dispatch_f32c.c +99 -0
  15. package/c/dispatch_f64.c +296 -0
  16. package/c/dispatch_f64c.c +98 -0
  17. package/c/dispatch_i16.c +96 -0
  18. package/c/dispatch_i32.c +89 -0
  19. package/c/dispatch_i4.c +150 -0
  20. package/c/dispatch_i64.c +86 -0
  21. package/c/dispatch_i8.c +289 -0
  22. package/c/dispatch_other.c +330 -0
  23. package/c/dispatch_u1.c +148 -0
  24. package/c/dispatch_u16.c +124 -0
  25. package/c/dispatch_u32.c +118 -0
  26. package/c/dispatch_u4.c +150 -0
  27. package/c/dispatch_u64.c +102 -0
  28. package/c/dispatch_u8.c +303 -0
  29. package/c/numkong.c +950 -0
  30. package/include/README.md +573 -0
  31. package/include/module.modulemap +129 -0
  32. package/include/numkong/attention/sapphireamx.h +1361 -0
  33. package/include/numkong/attention/sme.h +2066 -0
  34. package/include/numkong/attention.h +49 -0
  35. package/include/numkong/capabilities.h +748 -0
  36. package/include/numkong/cast/README.md +262 -0
  37. package/include/numkong/cast/haswell.h +975 -0
  38. package/include/numkong/cast/icelake.h +470 -0
  39. package/include/numkong/cast/neon.h +1192 -0
  40. package/include/numkong/cast/rvv.h +1021 -0
  41. package/include/numkong/cast/sapphire.h +262 -0
  42. package/include/numkong/cast/serial.h +2262 -0
  43. package/include/numkong/cast/skylake.h +856 -0
  44. package/include/numkong/cast/v128relaxed.h +180 -0
  45. package/include/numkong/cast.h +230 -0
  46. package/include/numkong/curved/README.md +223 -0
  47. package/include/numkong/curved/genoa.h +182 -0
  48. package/include/numkong/curved/haswell.h +276 -0
  49. package/include/numkong/curved/neon.h +205 -0
  50. package/include/numkong/curved/neonbfdot.h +212 -0
  51. package/include/numkong/curved/neonhalf.h +212 -0
  52. package/include/numkong/curved/rvv.h +305 -0
  53. package/include/numkong/curved/serial.h +207 -0
  54. package/include/numkong/curved/skylake.h +457 -0
  55. package/include/numkong/curved/smef64.h +506 -0
  56. package/include/numkong/curved.h +517 -0
  57. package/include/numkong/curved.hpp +144 -0
  58. package/include/numkong/dot/README.md +425 -0
  59. package/include/numkong/dot/alder.h +563 -0
  60. package/include/numkong/dot/genoa.h +315 -0
  61. package/include/numkong/dot/haswell.h +1688 -0
  62. package/include/numkong/dot/icelake.h +883 -0
  63. package/include/numkong/dot/neon.h +818 -0
  64. package/include/numkong/dot/neonbfdot.h +244 -0
  65. package/include/numkong/dot/neonfhm.h +360 -0
  66. package/include/numkong/dot/neonhalf.h +198 -0
  67. package/include/numkong/dot/neonsdot.h +508 -0
  68. package/include/numkong/dot/rvv.h +714 -0
  69. package/include/numkong/dot/rvvbb.h +72 -0
  70. package/include/numkong/dot/rvvbf16.h +123 -0
  71. package/include/numkong/dot/rvvhalf.h +129 -0
  72. package/include/numkong/dot/sapphire.h +141 -0
  73. package/include/numkong/dot/serial.h +838 -0
  74. package/include/numkong/dot/sierra.h +405 -0
  75. package/include/numkong/dot/skylake.h +1084 -0
  76. package/include/numkong/dot/sve.h +379 -0
  77. package/include/numkong/dot/svebfdot.h +74 -0
  78. package/include/numkong/dot/svehalf.h +123 -0
  79. package/include/numkong/dot/v128relaxed.h +1258 -0
  80. package/include/numkong/dot.h +1070 -0
  81. package/include/numkong/dot.hpp +94 -0
  82. package/include/numkong/dots/README.md +496 -0
  83. package/include/numkong/dots/alder.h +114 -0
  84. package/include/numkong/dots/genoa.h +94 -0
  85. package/include/numkong/dots/haswell.h +295 -0
  86. package/include/numkong/dots/icelake.h +171 -0
  87. package/include/numkong/dots/neon.h +120 -0
  88. package/include/numkong/dots/neonbfdot.h +58 -0
  89. package/include/numkong/dots/neonfhm.h +94 -0
  90. package/include/numkong/dots/neonhalf.h +57 -0
  91. package/include/numkong/dots/neonsdot.h +108 -0
  92. package/include/numkong/dots/rvv.h +2486 -0
  93. package/include/numkong/dots/sapphireamx.h +3973 -0
  94. package/include/numkong/dots/serial.h +2844 -0
  95. package/include/numkong/dots/sierra.h +97 -0
  96. package/include/numkong/dots/skylake.h +196 -0
  97. package/include/numkong/dots/sme.h +5372 -0
  98. package/include/numkong/dots/smebi32.h +461 -0
  99. package/include/numkong/dots/smef64.h +1318 -0
  100. package/include/numkong/dots/smehalf.h +47 -0
  101. package/include/numkong/dots/v128relaxed.h +294 -0
  102. package/include/numkong/dots.h +2804 -0
  103. package/include/numkong/dots.hpp +639 -0
  104. package/include/numkong/each/README.md +469 -0
  105. package/include/numkong/each/haswell.h +1658 -0
  106. package/include/numkong/each/icelake.h +272 -0
  107. package/include/numkong/each/neon.h +1104 -0
  108. package/include/numkong/each/neonbfdot.h +212 -0
  109. package/include/numkong/each/neonhalf.h +410 -0
  110. package/include/numkong/each/rvv.h +1121 -0
  111. package/include/numkong/each/sapphire.h +477 -0
  112. package/include/numkong/each/serial.h +260 -0
  113. package/include/numkong/each/skylake.h +1562 -0
  114. package/include/numkong/each.h +2146 -0
  115. package/include/numkong/each.hpp +434 -0
  116. package/include/numkong/geospatial/README.md +147 -0
  117. package/include/numkong/geospatial/haswell.h +593 -0
  118. package/include/numkong/geospatial/neon.h +571 -0
  119. package/include/numkong/geospatial/rvv.h +701 -0
  120. package/include/numkong/geospatial/serial.h +309 -0
  121. package/include/numkong/geospatial/skylake.h +577 -0
  122. package/include/numkong/geospatial/v128relaxed.h +613 -0
  123. package/include/numkong/geospatial.h +453 -0
  124. package/include/numkong/geospatial.hpp +235 -0
  125. package/include/numkong/matrix.hpp +336 -0
  126. package/include/numkong/maxsim/README.md +187 -0
  127. package/include/numkong/maxsim/alder.h +511 -0
  128. package/include/numkong/maxsim/genoa.h +115 -0
  129. package/include/numkong/maxsim/haswell.h +553 -0
  130. package/include/numkong/maxsim/icelake.h +480 -0
  131. package/include/numkong/maxsim/neonsdot.h +394 -0
  132. package/include/numkong/maxsim/sapphireamx.h +877 -0
  133. package/include/numkong/maxsim/serial.h +490 -0
  134. package/include/numkong/maxsim/sme.h +929 -0
  135. package/include/numkong/maxsim/v128relaxed.h +280 -0
  136. package/include/numkong/maxsim.h +571 -0
  137. package/include/numkong/maxsim.hpp +133 -0
  138. package/include/numkong/mesh/README.md +227 -0
  139. package/include/numkong/mesh/haswell.h +2235 -0
  140. package/include/numkong/mesh/neon.h +1329 -0
  141. package/include/numkong/mesh/neonbfdot.h +842 -0
  142. package/include/numkong/mesh/neonhalf.h +616 -0
  143. package/include/numkong/mesh/rvv.h +916 -0
  144. package/include/numkong/mesh/serial.h +742 -0
  145. package/include/numkong/mesh/skylake.h +1135 -0
  146. package/include/numkong/mesh/v128relaxed.h +1052 -0
  147. package/include/numkong/mesh.h +652 -0
  148. package/include/numkong/mesh.hpp +762 -0
  149. package/include/numkong/numkong.h +78 -0
  150. package/include/numkong/numkong.hpp +57 -0
  151. package/include/numkong/probability/README.md +173 -0
  152. package/include/numkong/probability/haswell.h +267 -0
  153. package/include/numkong/probability/neon.h +225 -0
  154. package/include/numkong/probability/rvv.h +409 -0
  155. package/include/numkong/probability/serial.h +169 -0
  156. package/include/numkong/probability/skylake.h +324 -0
  157. package/include/numkong/probability.h +383 -0
  158. package/include/numkong/probability.hpp +120 -0
  159. package/include/numkong/random.h +50 -0
  160. package/include/numkong/random.hpp +285 -0
  161. package/include/numkong/reduce/README.md +547 -0
  162. package/include/numkong/reduce/alder.h +632 -0
  163. package/include/numkong/reduce/genoa.h +201 -0
  164. package/include/numkong/reduce/haswell.h +3783 -0
  165. package/include/numkong/reduce/icelake.h +549 -0
  166. package/include/numkong/reduce/neon.h +3841 -0
  167. package/include/numkong/reduce/neonbfdot.h +353 -0
  168. package/include/numkong/reduce/neonfhm.h +665 -0
  169. package/include/numkong/reduce/neonhalf.h +157 -0
  170. package/include/numkong/reduce/neonsdot.h +357 -0
  171. package/include/numkong/reduce/rvv.h +3407 -0
  172. package/include/numkong/reduce/serial.h +757 -0
  173. package/include/numkong/reduce/sierra.h +338 -0
  174. package/include/numkong/reduce/skylake.h +3792 -0
  175. package/include/numkong/reduce/v128relaxed.h +2302 -0
  176. package/include/numkong/reduce.h +1597 -0
  177. package/include/numkong/reduce.hpp +633 -0
  178. package/include/numkong/scalar/README.md +89 -0
  179. package/include/numkong/scalar/haswell.h +113 -0
  180. package/include/numkong/scalar/neon.h +122 -0
  181. package/include/numkong/scalar/neonhalf.h +70 -0
  182. package/include/numkong/scalar/rvv.h +211 -0
  183. package/include/numkong/scalar/sapphire.h +63 -0
  184. package/include/numkong/scalar/serial.h +332 -0
  185. package/include/numkong/scalar/v128relaxed.h +56 -0
  186. package/include/numkong/scalar.h +683 -0
  187. package/include/numkong/set/README.md +179 -0
  188. package/include/numkong/set/haswell.h +334 -0
  189. package/include/numkong/set/icelake.h +485 -0
  190. package/include/numkong/set/neon.h +364 -0
  191. package/include/numkong/set/rvv.h +226 -0
  192. package/include/numkong/set/rvvbb.h +117 -0
  193. package/include/numkong/set/serial.h +174 -0
  194. package/include/numkong/set/sve.h +185 -0
  195. package/include/numkong/set/v128relaxed.h +240 -0
  196. package/include/numkong/set.h +457 -0
  197. package/include/numkong/set.hpp +114 -0
  198. package/include/numkong/sets/README.md +149 -0
  199. package/include/numkong/sets/haswell.h +63 -0
  200. package/include/numkong/sets/icelake.h +66 -0
  201. package/include/numkong/sets/neon.h +61 -0
  202. package/include/numkong/sets/serial.h +43 -0
  203. package/include/numkong/sets/smebi32.h +1099 -0
  204. package/include/numkong/sets/v128relaxed.h +58 -0
  205. package/include/numkong/sets.h +339 -0
  206. package/include/numkong/sparse/README.md +156 -0
  207. package/include/numkong/sparse/icelake.h +463 -0
  208. package/include/numkong/sparse/neon.h +288 -0
  209. package/include/numkong/sparse/serial.h +117 -0
  210. package/include/numkong/sparse/sve2.h +507 -0
  211. package/include/numkong/sparse/turin.h +322 -0
  212. package/include/numkong/sparse.h +363 -0
  213. package/include/numkong/sparse.hpp +113 -0
  214. package/include/numkong/spatial/README.md +435 -0
  215. package/include/numkong/spatial/alder.h +607 -0
  216. package/include/numkong/spatial/genoa.h +290 -0
  217. package/include/numkong/spatial/haswell.h +960 -0
  218. package/include/numkong/spatial/icelake.h +586 -0
  219. package/include/numkong/spatial/neon.h +773 -0
  220. package/include/numkong/spatial/neonbfdot.h +165 -0
  221. package/include/numkong/spatial/neonhalf.h +118 -0
  222. package/include/numkong/spatial/neonsdot.h +261 -0
  223. package/include/numkong/spatial/rvv.h +984 -0
  224. package/include/numkong/spatial/rvvbf16.h +123 -0
  225. package/include/numkong/spatial/rvvhalf.h +117 -0
  226. package/include/numkong/spatial/sapphire.h +343 -0
  227. package/include/numkong/spatial/serial.h +346 -0
  228. package/include/numkong/spatial/sierra.h +323 -0
  229. package/include/numkong/spatial/skylake.h +606 -0
  230. package/include/numkong/spatial/sve.h +224 -0
  231. package/include/numkong/spatial/svebfdot.h +122 -0
  232. package/include/numkong/spatial/svehalf.h +109 -0
  233. package/include/numkong/spatial/v128relaxed.h +717 -0
  234. package/include/numkong/spatial.h +1425 -0
  235. package/include/numkong/spatial.hpp +183 -0
  236. package/include/numkong/spatials/README.md +580 -0
  237. package/include/numkong/spatials/alder.h +94 -0
  238. package/include/numkong/spatials/genoa.h +94 -0
  239. package/include/numkong/spatials/haswell.h +219 -0
  240. package/include/numkong/spatials/icelake.h +113 -0
  241. package/include/numkong/spatials/neon.h +109 -0
  242. package/include/numkong/spatials/neonbfdot.h +60 -0
  243. package/include/numkong/spatials/neonfhm.h +92 -0
  244. package/include/numkong/spatials/neonhalf.h +58 -0
  245. package/include/numkong/spatials/neonsdot.h +109 -0
  246. package/include/numkong/spatials/rvv.h +1960 -0
  247. package/include/numkong/spatials/sapphireamx.h +1149 -0
  248. package/include/numkong/spatials/serial.h +226 -0
  249. package/include/numkong/spatials/sierra.h +96 -0
  250. package/include/numkong/spatials/skylake.h +184 -0
  251. package/include/numkong/spatials/sme.h +1901 -0
  252. package/include/numkong/spatials/smef64.h +465 -0
  253. package/include/numkong/spatials/v128relaxed.h +240 -0
  254. package/include/numkong/spatials.h +3021 -0
  255. package/include/numkong/spatials.hpp +508 -0
  256. package/include/numkong/tensor.hpp +1592 -0
  257. package/include/numkong/trigonometry/README.md +184 -0
  258. package/include/numkong/trigonometry/haswell.h +652 -0
  259. package/include/numkong/trigonometry/neon.h +639 -0
  260. package/include/numkong/trigonometry/rvv.h +699 -0
  261. package/include/numkong/trigonometry/serial.h +703 -0
  262. package/include/numkong/trigonometry/skylake.h +721 -0
  263. package/include/numkong/trigonometry/v128relaxed.h +666 -0
  264. package/include/numkong/trigonometry.h +467 -0
  265. package/include/numkong/trigonometry.hpp +166 -0
  266. package/include/numkong/types.h +1384 -0
  267. package/include/numkong/types.hpp +5603 -0
  268. package/include/numkong/vector.hpp +698 -0
  269. package/javascript/README.md +246 -0
  270. package/javascript/dist/cjs/numkong-wasm.d.ts +166 -0
  271. package/javascript/dist/cjs/numkong-wasm.js +617 -0
  272. package/javascript/dist/cjs/numkong.d.ts +343 -0
  273. package/javascript/dist/cjs/numkong.js +523 -0
  274. package/javascript/dist/cjs/package.json +3 -0
  275. package/javascript/dist/cjs/types.d.ts +284 -0
  276. package/javascript/dist/cjs/types.js +653 -0
  277. package/javascript/dist/esm/numkong-wasm.d.ts +166 -0
  278. package/javascript/dist/esm/numkong-wasm.js +595 -0
  279. package/javascript/dist/esm/numkong.d.ts +343 -0
  280. package/javascript/dist/esm/numkong.js +452 -0
  281. package/javascript/dist/esm/package.json +3 -0
  282. package/javascript/dist/esm/types.d.ts +284 -0
  283. package/javascript/dist/esm/types.js +630 -0
  284. package/javascript/dist-package-cjs.json +3 -0
  285. package/javascript/dist-package-esm.json +3 -0
  286. package/javascript/node-gyp-build.d.ts +1 -0
  287. package/javascript/numkong-wasm.ts +756 -0
  288. package/javascript/numkong.c +689 -0
  289. package/javascript/numkong.ts +575 -0
  290. package/javascript/tsconfig-base.json +39 -0
  291. package/javascript/tsconfig-cjs.json +8 -0
  292. package/javascript/tsconfig-esm.json +8 -0
  293. package/javascript/types.ts +674 -0
  294. package/package.json +87 -0
@@ -0,0 +1,263 @@
1
+ /**
2
+ * @brief Dispatch Initialization for E2M3 Data Types.
3
+ * @file c/dispatch_e2m3.c
4
+ * @author Ash Vardanian
5
+ * @date February 3, 2026
6
+ */
7
+ #include "dispatch.h"
8
+
9
+ #ifdef __cplusplus
10
+ extern "C" {
11
+ #endif
12
+
13
+ void nk_dispatch_e2m3_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_punned_t *m, nk_capability_t *c) {
14
+ typedef nk_kernel_punned_t m_t;
15
+ #if NK_TARGET_V128RELAXED
16
+ if (v & nk_cap_v128relaxed_k) switch (k) {
17
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_e2m3_v128relaxed, *c = nk_cap_v128relaxed_k; return;
18
+ case nk_kernel_reduce_moments_k:
19
+ *m = (m_t)&nk_reduce_moments_e2m3_v128relaxed, *c = nk_cap_v128relaxed_k;
20
+ return;
21
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_e2m3_v128relaxed, *c = nk_cap_v128relaxed_k; return;
22
+ case nk_kernel_dots_packed_size_k:
23
+ *m = (m_t)&nk_dots_packed_size_e2m3_v128relaxed, *c = nk_cap_v128relaxed_k;
24
+ return;
25
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_e2m3_v128relaxed, *c = nk_cap_v128relaxed_k; return;
26
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_e2m3_v128relaxed, *c = nk_cap_v128relaxed_k; return;
27
+ case nk_kernel_dots_symmetric_k:
28
+ *m = (m_t)&nk_dots_symmetric_e2m3_v128relaxed, *c = nk_cap_v128relaxed_k;
29
+ return;
30
+ case nk_kernel_angulars_packed_k:
31
+ *m = (m_t)&nk_angulars_packed_e2m3_v128relaxed, *c = nk_cap_v128relaxed_k;
32
+ return;
33
+ case nk_kernel_angulars_symmetric_k:
34
+ *m = (m_t)&nk_angulars_symmetric_e2m3_v128relaxed, *c = nk_cap_v128relaxed_k;
35
+ return;
36
+ case nk_kernel_euclideans_packed_k:
37
+ *m = (m_t)&nk_euclideans_packed_e2m3_v128relaxed, *c = nk_cap_v128relaxed_k;
38
+ return;
39
+ case nk_kernel_euclideans_symmetric_k:
40
+ *m = (m_t)&nk_euclideans_symmetric_e2m3_v128relaxed, *c = nk_cap_v128relaxed_k;
41
+ return;
42
+ default: break;
43
+ }
44
+ #endif
45
+ #if NK_TARGET_SME
46
+ if (v & nk_cap_sme_k) switch (k) {
47
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_e2m3_sme, *c = nk_cap_sme_k; return;
48
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_e2m3_sme, *c = nk_cap_sme_k; return;
49
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_e2m3_sme, *c = nk_cap_sme_k; return;
50
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_e2m3_sme, *c = nk_cap_sme_k; return;
51
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_e2m3_sme, *c = nk_cap_sme_k; return;
52
+ case nk_kernel_angulars_symmetric_k: *m = (m_t)&nk_angulars_symmetric_e2m3_sme, *c = nk_cap_sme_k; return;
53
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_e2m3_sme, *c = nk_cap_sme_k; return;
54
+ case nk_kernel_euclideans_symmetric_k: *m = (m_t)&nk_euclideans_symmetric_e2m3_sme, *c = nk_cap_sme_k; return;
55
+ default: break;
56
+ }
57
+ #endif
58
+ #if NK_TARGET_NEONSDOT
59
+ if (v & nk_cap_neonsdot_k) switch (k) {
60
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_e2m3_neonsdot, *c = nk_cap_neonsdot_k; return;
61
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_e2m3_neonsdot, *c = nk_cap_neonsdot_k; return;
62
+ default: break;
63
+ }
64
+ #endif
65
+ #if NK_TARGET_NEON
66
+ if (v & nk_cap_neon_k) switch (k) {
67
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_e2m3_neon, *c = nk_cap_neon_k; return;
68
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e2m3_neon, *c = nk_cap_neon_k; return;
69
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e2m3_neon, *c = nk_cap_neon_k; return;
70
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_e2m3_neon, *c = nk_cap_neon_k; return;
71
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_e2m3_neon, *c = nk_cap_neon_k; return;
72
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_e2m3_neon, *c = nk_cap_neon_k; return;
73
+ default: break;
74
+ }
75
+ #endif
76
+ #if NK_TARGET_SAPPHIREAMX
77
+ if (v & nk_cap_sapphireamx_k) switch (k) {
78
+ case nk_kernel_dots_packed_size_k:
79
+ *m = (m_t)&nk_dots_packed_size_e2m3_sapphireamx, *c = nk_cap_sapphireamx_k;
80
+ return;
81
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_e2m3_sapphireamx, *c = nk_cap_sapphireamx_k; return;
82
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_e2m3_sapphireamx, *c = nk_cap_sapphireamx_k; return;
83
+ case nk_kernel_dots_symmetric_k:
84
+ *m = (m_t)&nk_dots_symmetric_e2m3_sapphireamx, *c = nk_cap_sapphireamx_k;
85
+ return;
86
+ case nk_kernel_angulars_packed_k:
87
+ *m = (m_t)&nk_angulars_packed_e2m3_sapphireamx, *c = nk_cap_sapphireamx_k;
88
+ return;
89
+ case nk_kernel_angulars_symmetric_k:
90
+ *m = (m_t)&nk_angulars_symmetric_e2m3_sapphireamx, *c = nk_cap_sapphireamx_k;
91
+ return;
92
+ case nk_kernel_euclideans_packed_k:
93
+ *m = (m_t)&nk_euclideans_packed_e2m3_sapphireamx, *c = nk_cap_sapphireamx_k;
94
+ return;
95
+ case nk_kernel_euclideans_symmetric_k:
96
+ *m = (m_t)&nk_euclideans_symmetric_e2m3_sapphireamx, *c = nk_cap_sapphireamx_k;
97
+ return;
98
+ default: break;
99
+ }
100
+ #endif
101
+ #if NK_TARGET_SAPPHIRE
102
+ if (v & nk_cap_sapphire_k) switch (k) {
103
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e2m3_sapphire, *c = nk_cap_sapphire_k; return;
104
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e2m3_sapphire, *c = nk_cap_sapphire_k; return;
105
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_e2m3_sapphire, *c = nk_cap_sapphire_k; return;
106
+ default: break;
107
+ }
108
+ #endif
109
+ #if NK_TARGET_ICELAKE
110
+ if (v & nk_cap_icelake_k) switch (k) {
111
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_e2m3_icelake, *c = nk_cap_icelake_k; return;
112
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_e2m3_icelake, *c = nk_cap_icelake_k; return;
113
+ default: break;
114
+ }
115
+ #endif
116
+ #if NK_TARGET_SKYLAKE
117
+ if (v & nk_cap_skylake_k) switch (k) {
118
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e2m3_skylake, *c = nk_cap_skylake_k; return;
119
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e2m3_skylake, *c = nk_cap_skylake_k; return;
120
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_e2m3_skylake, *c = nk_cap_skylake_k; return;
121
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_e2m3_skylake, *c = nk_cap_skylake_k; return;
122
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_e2m3_skylake, *c = nk_cap_skylake_k; return;
123
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_e2m3_skylake, *c = nk_cap_skylake_k; return;
124
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_e2m3_skylake, *c = nk_cap_skylake_k; return;
125
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_e2m3_skylake, *c = nk_cap_skylake_k; return;
126
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_e2m3_skylake, *c = nk_cap_skylake_k; return;
127
+ case nk_kernel_angulars_symmetric_k:
128
+ *m = (m_t)&nk_angulars_symmetric_e2m3_skylake, *c = nk_cap_skylake_k;
129
+ return;
130
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_e2m3_skylake, *c = nk_cap_skylake_k; return;
131
+ case nk_kernel_euclideans_symmetric_k:
132
+ *m = (m_t)&nk_euclideans_symmetric_e2m3_skylake, *c = nk_cap_skylake_k;
133
+ return;
134
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_e2m3_skylake, *c = nk_cap_skylake_k; return;
135
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_e2m3_skylake, *c = nk_cap_skylake_k; return;
136
+ default: break;
137
+ }
138
+ #endif
139
+ #if NK_TARGET_SIERRA
140
+ if (v & nk_cap_sierra_k) switch (k) {
141
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_e2m3_sierra, *c = nk_cap_sierra_k; return;
142
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_e2m3_sierra, *c = nk_cap_sierra_k; return;
143
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e2m3_sierra, *c = nk_cap_sierra_k; return;
144
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e2m3_sierra, *c = nk_cap_sierra_k; return;
145
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_e2m3_sierra, *c = nk_cap_sierra_k; return;
146
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_e2m3_sierra, *c = nk_cap_sierra_k; return;
147
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_e2m3_sierra, *c = nk_cap_sierra_k; return;
148
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_e2m3_sierra, *c = nk_cap_sierra_k; return;
149
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_e2m3_sierra, *c = nk_cap_sierra_k; return;
150
+ default: break;
151
+ }
152
+ #endif
153
+ #if NK_TARGET_ALDER
154
+ if (v & nk_cap_alder_k) switch (k) {
155
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_e2m3_alder, *c = nk_cap_alder_k; return;
156
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_e2m3_alder, *c = nk_cap_alder_k; return;
157
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e2m3_alder, *c = nk_cap_alder_k; return;
158
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e2m3_alder, *c = nk_cap_alder_k; return;
159
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_e2m3_alder, *c = nk_cap_alder_k; return;
160
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_e2m3_alder, *c = nk_cap_alder_k; return;
161
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_e2m3_alder, *c = nk_cap_alder_k; return;
162
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_e2m3_alder, *c = nk_cap_alder_k; return;
163
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_e2m3_alder, *c = nk_cap_alder_k; return;
164
+ default: break;
165
+ }
166
+ #endif
167
+ #if NK_TARGET_HASWELL
168
+ if (v & nk_cap_haswell_k) switch (k) {
169
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_e2m3_haswell, *c = nk_cap_haswell_k; return;
170
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e2m3_haswell, *c = nk_cap_haswell_k; return;
171
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e2m3_haswell, *c = nk_cap_haswell_k; return;
172
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_e2m3_haswell, *c = nk_cap_haswell_k; return;
173
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_e2m3_haswell, *c = nk_cap_haswell_k; return;
174
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_e2m3_haswell, *c = nk_cap_haswell_k; return;
175
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_e2m3_haswell, *c = nk_cap_haswell_k; return;
176
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_e2m3_haswell, *c = nk_cap_haswell_k; return;
177
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_e2m3_haswell, *c = nk_cap_haswell_k; return;
178
+ case nk_kernel_angulars_symmetric_k:
179
+ *m = (m_t)&nk_angulars_symmetric_e2m3_haswell, *c = nk_cap_haswell_k;
180
+ return;
181
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_e2m3_haswell, *c = nk_cap_haswell_k; return;
182
+ case nk_kernel_euclideans_symmetric_k:
183
+ *m = (m_t)&nk_euclideans_symmetric_e2m3_haswell, *c = nk_cap_haswell_k;
184
+ return;
185
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_e2m3_haswell, *c = nk_cap_haswell_k; return;
186
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_e2m3_haswell, *c = nk_cap_haswell_k; return;
187
+ default: break;
188
+ }
189
+ #endif
190
+ #if NK_TARGET_RVV
191
+ if (v & nk_cap_rvv_k) switch (k) {
192
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_e2m3_rvv, *c = nk_cap_rvv_k; return;
193
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_e2m3_rvv, *c = nk_cap_rvv_k; return;
194
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_e2m3_rvv, *c = nk_cap_rvv_k; return;
195
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_e2m3_rvv, *c = nk_cap_rvv_k; return;
196
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_e2m3_rvv, *c = nk_cap_rvv_k; return;
197
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_e2m3_rvv, *c = nk_cap_rvv_k; return;
198
+ case nk_kernel_angulars_symmetric_k: *m = (m_t)&nk_angulars_symmetric_e2m3_rvv, *c = nk_cap_rvv_k; return;
199
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_e2m3_rvv, *c = nk_cap_rvv_k; return;
200
+ case nk_kernel_euclideans_symmetric_k: *m = (m_t)&nk_euclideans_symmetric_e2m3_rvv, *c = nk_cap_rvv_k; return;
201
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_e2m3_rvv, *c = nk_cap_rvv_k; return;
202
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_e2m3_rvv, *c = nk_cap_rvv_k; return;
203
+ default: break;
204
+ }
205
+ #endif
206
+ if (v & nk_cap_serial_k) switch (k) {
207
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_e2m3_serial, *c = nk_cap_serial_k; return;
208
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e2m3_serial, *c = nk_cap_serial_k; return;
209
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e2m3_serial, *c = nk_cap_serial_k; return;
210
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_e2m3_serial, *c = nk_cap_serial_k; return;
211
+ case nk_kernel_each_sum_k: *m = (m_t)&nk_each_sum_e2m3_serial, *c = nk_cap_serial_k; return;
212
+ case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_e2m3_serial, *c = nk_cap_serial_k; return;
213
+ case nk_kernel_each_blend_k: *m = (m_t)&nk_each_blend_e2m3_serial, *c = nk_cap_serial_k; return;
214
+ case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_e2m3_serial, *c = nk_cap_serial_k; return;
215
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_e2m3_serial, *c = nk_cap_serial_k; return;
216
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_e2m3_serial, *c = nk_cap_serial_k; return;
217
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_e2m3_serial, *c = nk_cap_serial_k; return;
218
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_e2m3_serial, *c = nk_cap_serial_k; return;
219
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_e2m3_serial, *c = nk_cap_serial_k; return;
220
+ case nk_kernel_angulars_symmetric_k: *m = (m_t)&nk_angulars_symmetric_e2m3_serial, *c = nk_cap_serial_k; return;
221
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_e2m3_serial, *c = nk_cap_serial_k; return;
222
+ case nk_kernel_euclideans_symmetric_k:
223
+ *m = (m_t)&nk_euclideans_symmetric_e2m3_serial, *c = nk_cap_serial_k;
224
+ return;
225
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_e2m3_serial, *c = nk_cap_serial_k; return;
226
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_e2m3_serial, *c = nk_cap_serial_k; return;
227
+ default: break;
228
+ }
229
+
230
+ // Error fallback - zero capability signals lookup failure
231
+ *m = (m_t)nk_error_dense_, *c = 0;
232
+ }
233
+
234
+ void nk_dispatch_e2m3_init_(nk_capability_t caps) {
235
+ nk_implementations_t *t = &nk_dispatch_table;
236
+ nk_capability_t used;
237
+
238
+ nk_dispatch_e2m3_find_(caps, nk_kernel_dot_k, (nk_kernel_punned_t *)&t->dot_e2m3, &used);
239
+ nk_dispatch_e2m3_find_(caps, nk_kernel_angular_k, (nk_kernel_punned_t *)&t->angular_e2m3, &used);
240
+ nk_dispatch_e2m3_find_(caps, nk_kernel_euclidean_k, (nk_kernel_punned_t *)&t->euclidean_e2m3, &used);
241
+ nk_dispatch_e2m3_find_(caps, nk_kernel_sqeuclidean_k, (nk_kernel_punned_t *)&t->sqeuclidean_e2m3, &used);
242
+ nk_dispatch_e2m3_find_(caps, nk_kernel_each_scale_k, (nk_kernel_punned_t *)&t->each_scale_e2m3, &used);
243
+ nk_dispatch_e2m3_find_(caps, nk_kernel_each_sum_k, (nk_kernel_punned_t *)&t->each_sum_e2m3, &used);
244
+ nk_dispatch_e2m3_find_(caps, nk_kernel_each_blend_k, (nk_kernel_punned_t *)&t->each_blend_e2m3, &used);
245
+ nk_dispatch_e2m3_find_(caps, nk_kernel_each_fma_k, (nk_kernel_punned_t *)&t->each_fma_e2m3, &used);
246
+ nk_dispatch_e2m3_find_(caps, nk_kernel_reduce_moments_k, (nk_kernel_punned_t *)&t->reduce_moments_e2m3, &used);
247
+ nk_dispatch_e2m3_find_(caps, nk_kernel_reduce_minmax_k, (nk_kernel_punned_t *)&t->reduce_minmax_e2m3, &used);
248
+ nk_dispatch_e2m3_find_(caps, nk_kernel_dots_packed_size_k, (nk_kernel_punned_t *)&t->dots_packed_size_e2m3, &used);
249
+ nk_dispatch_e2m3_find_(caps, nk_kernel_dots_pack_k, (nk_kernel_punned_t *)&t->dots_pack_e2m3, &used);
250
+ nk_dispatch_e2m3_find_(caps, nk_kernel_dots_packed_k, (nk_kernel_punned_t *)&t->dots_packed_e2m3, &used);
251
+ nk_dispatch_e2m3_find_(caps, nk_kernel_dots_symmetric_k, (nk_kernel_punned_t *)&t->dots_symmetric_e2m3, &used);
252
+ nk_dispatch_e2m3_find_(caps, nk_kernel_angulars_packed_k, (nk_kernel_punned_t *)&t->angulars_packed_e2m3, &used);
253
+ nk_dispatch_e2m3_find_(caps, nk_kernel_angulars_symmetric_k, (nk_kernel_punned_t *)&t->angulars_symmetric_e2m3,
254
+ &used);
255
+ nk_dispatch_e2m3_find_(caps, nk_kernel_euclideans_packed_k, (nk_kernel_punned_t *)&t->euclideans_packed_e2m3,
256
+ &used);
257
+ nk_dispatch_e2m3_find_(caps, nk_kernel_euclideans_symmetric_k, (nk_kernel_punned_t *)&t->euclideans_symmetric_e2m3,
258
+ &used);
259
+ }
260
+
261
+ #ifdef __cplusplus
262
+ }
263
+ #endif
@@ -0,0 +1,243 @@
1
+ /**
2
+ * @brief Dispatch Initialization for E3M2 Data Types.
3
+ * @file c/dispatch_e3m2.c
4
+ * @author Ash Vardanian
5
+ * @date February 3, 2026
6
+ */
7
+ #include "dispatch.h"
8
+
9
+ #ifdef __cplusplus
10
+ extern "C" {
11
+ #endif
12
+
13
+ void nk_dispatch_e3m2_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_punned_t *m, nk_capability_t *c) {
14
+ typedef nk_kernel_punned_t m_t;
15
+ #if NK_TARGET_V128RELAXED
16
+ if (v & nk_cap_v128relaxed_k) switch (k) {
17
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_e3m2_v128relaxed, *c = nk_cap_v128relaxed_k; return;
18
+ case nk_kernel_reduce_moments_k:
19
+ *m = (m_t)&nk_reduce_moments_e3m2_v128relaxed, *c = nk_cap_v128relaxed_k;
20
+ return;
21
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_e3m2_v128relaxed, *c = nk_cap_v128relaxed_k; return;
22
+ case nk_kernel_dots_packed_size_k:
23
+ *m = (m_t)&nk_dots_packed_size_e3m2_v128relaxed, *c = nk_cap_v128relaxed_k;
24
+ return;
25
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_e3m2_v128relaxed, *c = nk_cap_v128relaxed_k; return;
26
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_e3m2_v128relaxed, *c = nk_cap_v128relaxed_k; return;
27
+ case nk_kernel_dots_symmetric_k:
28
+ *m = (m_t)&nk_dots_symmetric_e3m2_v128relaxed, *c = nk_cap_v128relaxed_k;
29
+ return;
30
+ case nk_kernel_angulars_packed_k:
31
+ *m = (m_t)&nk_angulars_packed_e3m2_v128relaxed, *c = nk_cap_v128relaxed_k;
32
+ return;
33
+ case nk_kernel_angulars_symmetric_k:
34
+ *m = (m_t)&nk_angulars_symmetric_e3m2_v128relaxed, *c = nk_cap_v128relaxed_k;
35
+ return;
36
+ case nk_kernel_euclideans_packed_k:
37
+ *m = (m_t)&nk_euclideans_packed_e3m2_v128relaxed, *c = nk_cap_v128relaxed_k;
38
+ return;
39
+ case nk_kernel_euclideans_symmetric_k:
40
+ *m = (m_t)&nk_euclideans_symmetric_e3m2_v128relaxed, *c = nk_cap_v128relaxed_k;
41
+ return;
42
+ default: break;
43
+ }
44
+ #endif
45
+ #if NK_TARGET_SME
46
+ if (v & nk_cap_sme_k) switch (k) {
47
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_e3m2_sme, *c = nk_cap_sme_k; return;
48
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_e3m2_sme, *c = nk_cap_sme_k; return;
49
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_e3m2_sme, *c = nk_cap_sme_k; return;
50
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_e3m2_sme, *c = nk_cap_sme_k; return;
51
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_e3m2_sme, *c = nk_cap_sme_k; return;
52
+ case nk_kernel_angulars_symmetric_k: *m = (m_t)&nk_angulars_symmetric_e3m2_sme, *c = nk_cap_sme_k; return;
53
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_e3m2_sme, *c = nk_cap_sme_k; return;
54
+ case nk_kernel_euclideans_symmetric_k: *m = (m_t)&nk_euclideans_symmetric_e3m2_sme, *c = nk_cap_sme_k; return;
55
+ default: break;
56
+ }
57
+ #endif
58
+ #if NK_TARGET_NEONSDOT
59
+ if (v & nk_cap_neonsdot_k) switch (k) {
60
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_e3m2_neonsdot, *c = nk_cap_neonsdot_k; return;
61
+ default: break;
62
+ }
63
+ #endif
64
+ #if NK_TARGET_NEON
65
+ if (v & nk_cap_neon_k) switch (k) {
66
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_e3m2_neon, *c = nk_cap_neon_k; return;
67
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e3m2_neon, *c = nk_cap_neon_k; return;
68
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e3m2_neon, *c = nk_cap_neon_k; return;
69
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_e3m2_neon, *c = nk_cap_neon_k; return;
70
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_e3m2_neon, *c = nk_cap_neon_k; return;
71
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_e3m2_neon, *c = nk_cap_neon_k; return;
72
+ default: break;
73
+ }
74
+ #endif
75
+ #if NK_TARGET_SAPPHIREAMX
76
+ if (v & nk_cap_sapphireamx_k) switch (k) {
77
+ case nk_kernel_dots_packed_size_k:
78
+ *m = (m_t)&nk_dots_packed_size_e3m2_sapphireamx, *c = nk_cap_sapphireamx_k;
79
+ return;
80
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_e3m2_sapphireamx, *c = nk_cap_sapphireamx_k; return;
81
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_e3m2_sapphireamx, *c = nk_cap_sapphireamx_k; return;
82
+ case nk_kernel_dots_symmetric_k:
83
+ *m = (m_t)&nk_dots_symmetric_e3m2_sapphireamx, *c = nk_cap_sapphireamx_k;
84
+ return;
85
+ case nk_kernel_angulars_packed_k:
86
+ *m = (m_t)&nk_angulars_packed_e3m2_sapphireamx, *c = nk_cap_sapphireamx_k;
87
+ return;
88
+ case nk_kernel_angulars_symmetric_k:
89
+ *m = (m_t)&nk_angulars_symmetric_e3m2_sapphireamx, *c = nk_cap_sapphireamx_k;
90
+ return;
91
+ case nk_kernel_euclideans_packed_k:
92
+ *m = (m_t)&nk_euclideans_packed_e3m2_sapphireamx, *c = nk_cap_sapphireamx_k;
93
+ return;
94
+ case nk_kernel_euclideans_symmetric_k:
95
+ *m = (m_t)&nk_euclideans_symmetric_e3m2_sapphireamx, *c = nk_cap_sapphireamx_k;
96
+ return;
97
+ default: break;
98
+ }
99
+ #endif
100
+ #if NK_TARGET_SAPPHIRE
101
+ if (v & nk_cap_sapphire_k) switch (k) {
102
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e3m2_sapphire, *c = nk_cap_sapphire_k; return;
103
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e3m2_sapphire, *c = nk_cap_sapphire_k; return;
104
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_e3m2_sapphire, *c = nk_cap_sapphire_k; return;
105
+ default: break;
106
+ }
107
+ #endif
108
+ #if NK_TARGET_ICELAKE
109
+ if (v & nk_cap_icelake_k) switch (k) {
110
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_e3m2_icelake, *c = nk_cap_icelake_k; return;
111
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_e3m2_icelake, *c = nk_cap_icelake_k; return;
112
+ default: break;
113
+ }
114
+ #endif
115
+ #if NK_TARGET_SKYLAKE
116
+ if (v & nk_cap_skylake_k) switch (k) {
117
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_e3m2_skylake, *c = nk_cap_skylake_k; return;
118
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e3m2_skylake, *c = nk_cap_skylake_k; return;
119
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e3m2_skylake, *c = nk_cap_skylake_k; return;
120
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_e3m2_skylake, *c = nk_cap_skylake_k; return;
121
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_e3m2_skylake, *c = nk_cap_skylake_k; return;
122
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_e3m2_skylake, *c = nk_cap_skylake_k; return;
123
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_e3m2_skylake, *c = nk_cap_skylake_k; return;
124
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_e3m2_skylake, *c = nk_cap_skylake_k; return;
125
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_e3m2_skylake, *c = nk_cap_skylake_k; return;
126
+ case nk_kernel_angulars_symmetric_k:
127
+ *m = (m_t)&nk_angulars_symmetric_e3m2_skylake, *c = nk_cap_skylake_k;
128
+ return;
129
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_e3m2_skylake, *c = nk_cap_skylake_k; return;
130
+ case nk_kernel_euclideans_symmetric_k:
131
+ *m = (m_t)&nk_euclideans_symmetric_e3m2_skylake, *c = nk_cap_skylake_k;
132
+ return;
133
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_e3m2_skylake, *c = nk_cap_skylake_k; return;
134
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_e3m2_skylake, *c = nk_cap_skylake_k; return;
135
+ default: break;
136
+ }
137
+ #endif
138
+ #if NK_TARGET_ALDER
139
+ if (v & nk_cap_alder_k) switch (k) {
140
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_e3m2_alder, *c = nk_cap_alder_k; return;
141
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e3m2_alder, *c = nk_cap_alder_k; return;
142
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e3m2_alder, *c = nk_cap_alder_k; return;
143
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_e3m2_alder, *c = nk_cap_alder_k; return;
144
+ default: break;
145
+ }
146
+ #endif
147
+ #if NK_TARGET_HASWELL
148
+ if (v & nk_cap_haswell_k) switch (k) {
149
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_e3m2_haswell, *c = nk_cap_haswell_k; return;
150
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e3m2_haswell, *c = nk_cap_haswell_k; return;
151
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e3m2_haswell, *c = nk_cap_haswell_k; return;
152
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_e3m2_haswell, *c = nk_cap_haswell_k; return;
153
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_e3m2_haswell, *c = nk_cap_haswell_k; return;
154
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_e3m2_haswell, *c = nk_cap_haswell_k; return;
155
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_e3m2_haswell, *c = nk_cap_haswell_k; return;
156
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_e3m2_haswell, *c = nk_cap_haswell_k; return;
157
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_e3m2_haswell, *c = nk_cap_haswell_k; return;
158
+ case nk_kernel_angulars_symmetric_k:
159
+ *m = (m_t)&nk_angulars_symmetric_e3m2_haswell, *c = nk_cap_haswell_k;
160
+ return;
161
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_e3m2_haswell, *c = nk_cap_haswell_k; return;
162
+ case nk_kernel_euclideans_symmetric_k:
163
+ *m = (m_t)&nk_euclideans_symmetric_e3m2_haswell, *c = nk_cap_haswell_k;
164
+ return;
165
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_e3m2_haswell, *c = nk_cap_haswell_k; return;
166
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_e3m2_haswell, *c = nk_cap_haswell_k; return;
167
+ default: break;
168
+ }
169
+ #endif
170
+ #if NK_TARGET_RVV
171
+ if (v & nk_cap_rvv_k) switch (k) {
172
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_e3m2_rvv, *c = nk_cap_rvv_k; return;
173
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_e3m2_rvv, *c = nk_cap_rvv_k; return;
174
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_e3m2_rvv, *c = nk_cap_rvv_k; return;
175
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_e3m2_rvv, *c = nk_cap_rvv_k; return;
176
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_e3m2_rvv, *c = nk_cap_rvv_k; return;
177
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_e3m2_rvv, *c = nk_cap_rvv_k; return;
178
+ case nk_kernel_angulars_symmetric_k: *m = (m_t)&nk_angulars_symmetric_e3m2_rvv, *c = nk_cap_rvv_k; return;
179
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_e3m2_rvv, *c = nk_cap_rvv_k; return;
180
+ case nk_kernel_euclideans_symmetric_k: *m = (m_t)&nk_euclideans_symmetric_e3m2_rvv, *c = nk_cap_rvv_k; return;
181
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_e3m2_rvv, *c = nk_cap_rvv_k; return;
182
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_e3m2_rvv, *c = nk_cap_rvv_k; return;
183
+ default: break;
184
+ }
185
+ #endif
186
+ if (v & nk_cap_serial_k) switch (k) {
187
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_e3m2_serial, *c = nk_cap_serial_k; return;
188
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e3m2_serial, *c = nk_cap_serial_k; return;
189
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e3m2_serial, *c = nk_cap_serial_k; return;
190
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_e3m2_serial, *c = nk_cap_serial_k; return;
191
+ case nk_kernel_each_sum_k: *m = (m_t)&nk_each_sum_e3m2_serial, *c = nk_cap_serial_k; return;
192
+ case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_e3m2_serial, *c = nk_cap_serial_k; return;
193
+ case nk_kernel_each_blend_k: *m = (m_t)&nk_each_blend_e3m2_serial, *c = nk_cap_serial_k; return;
194
+ case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_e3m2_serial, *c = nk_cap_serial_k; return;
195
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_e3m2_serial, *c = nk_cap_serial_k; return;
196
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_e3m2_serial, *c = nk_cap_serial_k; return;
197
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_e3m2_serial, *c = nk_cap_serial_k; return;
198
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_e3m2_serial, *c = nk_cap_serial_k; return;
199
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_e3m2_serial, *c = nk_cap_serial_k; return;
200
+ case nk_kernel_angulars_symmetric_k: *m = (m_t)&nk_angulars_symmetric_e3m2_serial, *c = nk_cap_serial_k; return;
201
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_e3m2_serial, *c = nk_cap_serial_k; return;
202
+ case nk_kernel_euclideans_symmetric_k:
203
+ *m = (m_t)&nk_euclideans_symmetric_e3m2_serial, *c = nk_cap_serial_k;
204
+ return;
205
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_e3m2_serial, *c = nk_cap_serial_k; return;
206
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_e3m2_serial, *c = nk_cap_serial_k; return;
207
+ default: break;
208
+ }
209
+
210
+ // Error fallback - zero capability signals lookup failure
211
+ *m = (m_t)nk_error_dense_, *c = 0;
212
+ }
213
+
214
+ void nk_dispatch_e3m2_init_(nk_capability_t caps) {
215
+ nk_implementations_t *t = &nk_dispatch_table;
216
+ nk_capability_t used;
217
+
218
+ nk_dispatch_e3m2_find_(caps, nk_kernel_dot_k, (nk_kernel_punned_t *)&t->dot_e3m2, &used);
219
+ nk_dispatch_e3m2_find_(caps, nk_kernel_angular_k, (nk_kernel_punned_t *)&t->angular_e3m2, &used);
220
+ nk_dispatch_e3m2_find_(caps, nk_kernel_euclidean_k, (nk_kernel_punned_t *)&t->euclidean_e3m2, &used);
221
+ nk_dispatch_e3m2_find_(caps, nk_kernel_sqeuclidean_k, (nk_kernel_punned_t *)&t->sqeuclidean_e3m2, &used);
222
+ nk_dispatch_e3m2_find_(caps, nk_kernel_each_scale_k, (nk_kernel_punned_t *)&t->each_scale_e3m2, &used);
223
+ nk_dispatch_e3m2_find_(caps, nk_kernel_each_sum_k, (nk_kernel_punned_t *)&t->each_sum_e3m2, &used);
224
+ nk_dispatch_e3m2_find_(caps, nk_kernel_each_blend_k, (nk_kernel_punned_t *)&t->each_blend_e3m2, &used);
225
+ nk_dispatch_e3m2_find_(caps, nk_kernel_each_fma_k, (nk_kernel_punned_t *)&t->each_fma_e3m2, &used);
226
+ nk_dispatch_e3m2_find_(caps, nk_kernel_reduce_moments_k, (nk_kernel_punned_t *)&t->reduce_moments_e3m2, &used);
227
+ nk_dispatch_e3m2_find_(caps, nk_kernel_reduce_minmax_k, (nk_kernel_punned_t *)&t->reduce_minmax_e3m2, &used);
228
+ nk_dispatch_e3m2_find_(caps, nk_kernel_dots_packed_size_k, (nk_kernel_punned_t *)&t->dots_packed_size_e3m2, &used);
229
+ nk_dispatch_e3m2_find_(caps, nk_kernel_dots_pack_k, (nk_kernel_punned_t *)&t->dots_pack_e3m2, &used);
230
+ nk_dispatch_e3m2_find_(caps, nk_kernel_dots_packed_k, (nk_kernel_punned_t *)&t->dots_packed_e3m2, &used);
231
+ nk_dispatch_e3m2_find_(caps, nk_kernel_dots_symmetric_k, (nk_kernel_punned_t *)&t->dots_symmetric_e3m2, &used);
232
+ nk_dispatch_e3m2_find_(caps, nk_kernel_angulars_packed_k, (nk_kernel_punned_t *)&t->angulars_packed_e3m2, &used);
233
+ nk_dispatch_e3m2_find_(caps, nk_kernel_angulars_symmetric_k, (nk_kernel_punned_t *)&t->angulars_symmetric_e3m2,
234
+ &used);
235
+ nk_dispatch_e3m2_find_(caps, nk_kernel_euclideans_packed_k, (nk_kernel_punned_t *)&t->euclideans_packed_e3m2,
236
+ &used);
237
+ nk_dispatch_e3m2_find_(caps, nk_kernel_euclideans_symmetric_k, (nk_kernel_punned_t *)&t->euclideans_symmetric_e3m2,
238
+ &used);
239
+ }
240
+
241
+ #ifdef __cplusplus
242
+ }
243
+ #endif