numkong 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (294) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +495 -0
  3. package/binding.gyp +540 -0
  4. package/c/dispatch.h +512 -0
  5. package/c/dispatch_bf16.c +389 -0
  6. package/c/dispatch_bf16c.c +52 -0
  7. package/c/dispatch_e2m3.c +263 -0
  8. package/c/dispatch_e3m2.c +243 -0
  9. package/c/dispatch_e4m3.c +276 -0
  10. package/c/dispatch_e5m2.c +272 -0
  11. package/c/dispatch_f16.c +376 -0
  12. package/c/dispatch_f16c.c +58 -0
  13. package/c/dispatch_f32.c +378 -0
  14. package/c/dispatch_f32c.c +99 -0
  15. package/c/dispatch_f64.c +296 -0
  16. package/c/dispatch_f64c.c +98 -0
  17. package/c/dispatch_i16.c +96 -0
  18. package/c/dispatch_i32.c +89 -0
  19. package/c/dispatch_i4.c +150 -0
  20. package/c/dispatch_i64.c +86 -0
  21. package/c/dispatch_i8.c +289 -0
  22. package/c/dispatch_other.c +330 -0
  23. package/c/dispatch_u1.c +148 -0
  24. package/c/dispatch_u16.c +124 -0
  25. package/c/dispatch_u32.c +118 -0
  26. package/c/dispatch_u4.c +150 -0
  27. package/c/dispatch_u64.c +102 -0
  28. package/c/dispatch_u8.c +303 -0
  29. package/c/numkong.c +950 -0
  30. package/include/README.md +573 -0
  31. package/include/module.modulemap +129 -0
  32. package/include/numkong/attention/sapphireamx.h +1361 -0
  33. package/include/numkong/attention/sme.h +2066 -0
  34. package/include/numkong/attention.h +49 -0
  35. package/include/numkong/capabilities.h +748 -0
  36. package/include/numkong/cast/README.md +262 -0
  37. package/include/numkong/cast/haswell.h +975 -0
  38. package/include/numkong/cast/icelake.h +470 -0
  39. package/include/numkong/cast/neon.h +1192 -0
  40. package/include/numkong/cast/rvv.h +1021 -0
  41. package/include/numkong/cast/sapphire.h +262 -0
  42. package/include/numkong/cast/serial.h +2262 -0
  43. package/include/numkong/cast/skylake.h +856 -0
  44. package/include/numkong/cast/v128relaxed.h +180 -0
  45. package/include/numkong/cast.h +230 -0
  46. package/include/numkong/curved/README.md +223 -0
  47. package/include/numkong/curved/genoa.h +182 -0
  48. package/include/numkong/curved/haswell.h +276 -0
  49. package/include/numkong/curved/neon.h +205 -0
  50. package/include/numkong/curved/neonbfdot.h +212 -0
  51. package/include/numkong/curved/neonhalf.h +212 -0
  52. package/include/numkong/curved/rvv.h +305 -0
  53. package/include/numkong/curved/serial.h +207 -0
  54. package/include/numkong/curved/skylake.h +457 -0
  55. package/include/numkong/curved/smef64.h +506 -0
  56. package/include/numkong/curved.h +517 -0
  57. package/include/numkong/curved.hpp +144 -0
  58. package/include/numkong/dot/README.md +425 -0
  59. package/include/numkong/dot/alder.h +563 -0
  60. package/include/numkong/dot/genoa.h +315 -0
  61. package/include/numkong/dot/haswell.h +1688 -0
  62. package/include/numkong/dot/icelake.h +883 -0
  63. package/include/numkong/dot/neon.h +818 -0
  64. package/include/numkong/dot/neonbfdot.h +244 -0
  65. package/include/numkong/dot/neonfhm.h +360 -0
  66. package/include/numkong/dot/neonhalf.h +198 -0
  67. package/include/numkong/dot/neonsdot.h +508 -0
  68. package/include/numkong/dot/rvv.h +714 -0
  69. package/include/numkong/dot/rvvbb.h +72 -0
  70. package/include/numkong/dot/rvvbf16.h +123 -0
  71. package/include/numkong/dot/rvvhalf.h +129 -0
  72. package/include/numkong/dot/sapphire.h +141 -0
  73. package/include/numkong/dot/serial.h +838 -0
  74. package/include/numkong/dot/sierra.h +405 -0
  75. package/include/numkong/dot/skylake.h +1084 -0
  76. package/include/numkong/dot/sve.h +379 -0
  77. package/include/numkong/dot/svebfdot.h +74 -0
  78. package/include/numkong/dot/svehalf.h +123 -0
  79. package/include/numkong/dot/v128relaxed.h +1258 -0
  80. package/include/numkong/dot.h +1070 -0
  81. package/include/numkong/dot.hpp +94 -0
  82. package/include/numkong/dots/README.md +496 -0
  83. package/include/numkong/dots/alder.h +114 -0
  84. package/include/numkong/dots/genoa.h +94 -0
  85. package/include/numkong/dots/haswell.h +295 -0
  86. package/include/numkong/dots/icelake.h +171 -0
  87. package/include/numkong/dots/neon.h +120 -0
  88. package/include/numkong/dots/neonbfdot.h +58 -0
  89. package/include/numkong/dots/neonfhm.h +94 -0
  90. package/include/numkong/dots/neonhalf.h +57 -0
  91. package/include/numkong/dots/neonsdot.h +108 -0
  92. package/include/numkong/dots/rvv.h +2486 -0
  93. package/include/numkong/dots/sapphireamx.h +3973 -0
  94. package/include/numkong/dots/serial.h +2844 -0
  95. package/include/numkong/dots/sierra.h +97 -0
  96. package/include/numkong/dots/skylake.h +196 -0
  97. package/include/numkong/dots/sme.h +5372 -0
  98. package/include/numkong/dots/smebi32.h +461 -0
  99. package/include/numkong/dots/smef64.h +1318 -0
  100. package/include/numkong/dots/smehalf.h +47 -0
  101. package/include/numkong/dots/v128relaxed.h +294 -0
  102. package/include/numkong/dots.h +2804 -0
  103. package/include/numkong/dots.hpp +639 -0
  104. package/include/numkong/each/README.md +469 -0
  105. package/include/numkong/each/haswell.h +1658 -0
  106. package/include/numkong/each/icelake.h +272 -0
  107. package/include/numkong/each/neon.h +1104 -0
  108. package/include/numkong/each/neonbfdot.h +212 -0
  109. package/include/numkong/each/neonhalf.h +410 -0
  110. package/include/numkong/each/rvv.h +1121 -0
  111. package/include/numkong/each/sapphire.h +477 -0
  112. package/include/numkong/each/serial.h +260 -0
  113. package/include/numkong/each/skylake.h +1562 -0
  114. package/include/numkong/each.h +2146 -0
  115. package/include/numkong/each.hpp +434 -0
  116. package/include/numkong/geospatial/README.md +147 -0
  117. package/include/numkong/geospatial/haswell.h +593 -0
  118. package/include/numkong/geospatial/neon.h +571 -0
  119. package/include/numkong/geospatial/rvv.h +701 -0
  120. package/include/numkong/geospatial/serial.h +309 -0
  121. package/include/numkong/geospatial/skylake.h +577 -0
  122. package/include/numkong/geospatial/v128relaxed.h +613 -0
  123. package/include/numkong/geospatial.h +453 -0
  124. package/include/numkong/geospatial.hpp +235 -0
  125. package/include/numkong/matrix.hpp +336 -0
  126. package/include/numkong/maxsim/README.md +187 -0
  127. package/include/numkong/maxsim/alder.h +511 -0
  128. package/include/numkong/maxsim/genoa.h +115 -0
  129. package/include/numkong/maxsim/haswell.h +553 -0
  130. package/include/numkong/maxsim/icelake.h +480 -0
  131. package/include/numkong/maxsim/neonsdot.h +394 -0
  132. package/include/numkong/maxsim/sapphireamx.h +877 -0
  133. package/include/numkong/maxsim/serial.h +490 -0
  134. package/include/numkong/maxsim/sme.h +929 -0
  135. package/include/numkong/maxsim/v128relaxed.h +280 -0
  136. package/include/numkong/maxsim.h +571 -0
  137. package/include/numkong/maxsim.hpp +133 -0
  138. package/include/numkong/mesh/README.md +227 -0
  139. package/include/numkong/mesh/haswell.h +2235 -0
  140. package/include/numkong/mesh/neon.h +1329 -0
  141. package/include/numkong/mesh/neonbfdot.h +842 -0
  142. package/include/numkong/mesh/neonhalf.h +616 -0
  143. package/include/numkong/mesh/rvv.h +916 -0
  144. package/include/numkong/mesh/serial.h +742 -0
  145. package/include/numkong/mesh/skylake.h +1135 -0
  146. package/include/numkong/mesh/v128relaxed.h +1052 -0
  147. package/include/numkong/mesh.h +652 -0
  148. package/include/numkong/mesh.hpp +762 -0
  149. package/include/numkong/numkong.h +78 -0
  150. package/include/numkong/numkong.hpp +57 -0
  151. package/include/numkong/probability/README.md +173 -0
  152. package/include/numkong/probability/haswell.h +267 -0
  153. package/include/numkong/probability/neon.h +225 -0
  154. package/include/numkong/probability/rvv.h +409 -0
  155. package/include/numkong/probability/serial.h +169 -0
  156. package/include/numkong/probability/skylake.h +324 -0
  157. package/include/numkong/probability.h +383 -0
  158. package/include/numkong/probability.hpp +120 -0
  159. package/include/numkong/random.h +50 -0
  160. package/include/numkong/random.hpp +285 -0
  161. package/include/numkong/reduce/README.md +547 -0
  162. package/include/numkong/reduce/alder.h +632 -0
  163. package/include/numkong/reduce/genoa.h +201 -0
  164. package/include/numkong/reduce/haswell.h +3783 -0
  165. package/include/numkong/reduce/icelake.h +549 -0
  166. package/include/numkong/reduce/neon.h +3841 -0
  167. package/include/numkong/reduce/neonbfdot.h +353 -0
  168. package/include/numkong/reduce/neonfhm.h +665 -0
  169. package/include/numkong/reduce/neonhalf.h +157 -0
  170. package/include/numkong/reduce/neonsdot.h +357 -0
  171. package/include/numkong/reduce/rvv.h +3407 -0
  172. package/include/numkong/reduce/serial.h +757 -0
  173. package/include/numkong/reduce/sierra.h +338 -0
  174. package/include/numkong/reduce/skylake.h +3792 -0
  175. package/include/numkong/reduce/v128relaxed.h +2302 -0
  176. package/include/numkong/reduce.h +1597 -0
  177. package/include/numkong/reduce.hpp +633 -0
  178. package/include/numkong/scalar/README.md +89 -0
  179. package/include/numkong/scalar/haswell.h +113 -0
  180. package/include/numkong/scalar/neon.h +122 -0
  181. package/include/numkong/scalar/neonhalf.h +70 -0
  182. package/include/numkong/scalar/rvv.h +211 -0
  183. package/include/numkong/scalar/sapphire.h +63 -0
  184. package/include/numkong/scalar/serial.h +332 -0
  185. package/include/numkong/scalar/v128relaxed.h +56 -0
  186. package/include/numkong/scalar.h +683 -0
  187. package/include/numkong/set/README.md +179 -0
  188. package/include/numkong/set/haswell.h +334 -0
  189. package/include/numkong/set/icelake.h +485 -0
  190. package/include/numkong/set/neon.h +364 -0
  191. package/include/numkong/set/rvv.h +226 -0
  192. package/include/numkong/set/rvvbb.h +117 -0
  193. package/include/numkong/set/serial.h +174 -0
  194. package/include/numkong/set/sve.h +185 -0
  195. package/include/numkong/set/v128relaxed.h +240 -0
  196. package/include/numkong/set.h +457 -0
  197. package/include/numkong/set.hpp +114 -0
  198. package/include/numkong/sets/README.md +149 -0
  199. package/include/numkong/sets/haswell.h +63 -0
  200. package/include/numkong/sets/icelake.h +66 -0
  201. package/include/numkong/sets/neon.h +61 -0
  202. package/include/numkong/sets/serial.h +43 -0
  203. package/include/numkong/sets/smebi32.h +1099 -0
  204. package/include/numkong/sets/v128relaxed.h +58 -0
  205. package/include/numkong/sets.h +339 -0
  206. package/include/numkong/sparse/README.md +156 -0
  207. package/include/numkong/sparse/icelake.h +463 -0
  208. package/include/numkong/sparse/neon.h +288 -0
  209. package/include/numkong/sparse/serial.h +117 -0
  210. package/include/numkong/sparse/sve2.h +507 -0
  211. package/include/numkong/sparse/turin.h +322 -0
  212. package/include/numkong/sparse.h +363 -0
  213. package/include/numkong/sparse.hpp +113 -0
  214. package/include/numkong/spatial/README.md +435 -0
  215. package/include/numkong/spatial/alder.h +607 -0
  216. package/include/numkong/spatial/genoa.h +290 -0
  217. package/include/numkong/spatial/haswell.h +960 -0
  218. package/include/numkong/spatial/icelake.h +586 -0
  219. package/include/numkong/spatial/neon.h +773 -0
  220. package/include/numkong/spatial/neonbfdot.h +165 -0
  221. package/include/numkong/spatial/neonhalf.h +118 -0
  222. package/include/numkong/spatial/neonsdot.h +261 -0
  223. package/include/numkong/spatial/rvv.h +984 -0
  224. package/include/numkong/spatial/rvvbf16.h +123 -0
  225. package/include/numkong/spatial/rvvhalf.h +117 -0
  226. package/include/numkong/spatial/sapphire.h +343 -0
  227. package/include/numkong/spatial/serial.h +346 -0
  228. package/include/numkong/spatial/sierra.h +323 -0
  229. package/include/numkong/spatial/skylake.h +606 -0
  230. package/include/numkong/spatial/sve.h +224 -0
  231. package/include/numkong/spatial/svebfdot.h +122 -0
  232. package/include/numkong/spatial/svehalf.h +109 -0
  233. package/include/numkong/spatial/v128relaxed.h +717 -0
  234. package/include/numkong/spatial.h +1425 -0
  235. package/include/numkong/spatial.hpp +183 -0
  236. package/include/numkong/spatials/README.md +580 -0
  237. package/include/numkong/spatials/alder.h +94 -0
  238. package/include/numkong/spatials/genoa.h +94 -0
  239. package/include/numkong/spatials/haswell.h +219 -0
  240. package/include/numkong/spatials/icelake.h +113 -0
  241. package/include/numkong/spatials/neon.h +109 -0
  242. package/include/numkong/spatials/neonbfdot.h +60 -0
  243. package/include/numkong/spatials/neonfhm.h +92 -0
  244. package/include/numkong/spatials/neonhalf.h +58 -0
  245. package/include/numkong/spatials/neonsdot.h +109 -0
  246. package/include/numkong/spatials/rvv.h +1960 -0
  247. package/include/numkong/spatials/sapphireamx.h +1149 -0
  248. package/include/numkong/spatials/serial.h +226 -0
  249. package/include/numkong/spatials/sierra.h +96 -0
  250. package/include/numkong/spatials/skylake.h +184 -0
  251. package/include/numkong/spatials/sme.h +1901 -0
  252. package/include/numkong/spatials/smef64.h +465 -0
  253. package/include/numkong/spatials/v128relaxed.h +240 -0
  254. package/include/numkong/spatials.h +3021 -0
  255. package/include/numkong/spatials.hpp +508 -0
  256. package/include/numkong/tensor.hpp +1592 -0
  257. package/include/numkong/trigonometry/README.md +184 -0
  258. package/include/numkong/trigonometry/haswell.h +652 -0
  259. package/include/numkong/trigonometry/neon.h +639 -0
  260. package/include/numkong/trigonometry/rvv.h +699 -0
  261. package/include/numkong/trigonometry/serial.h +703 -0
  262. package/include/numkong/trigonometry/skylake.h +721 -0
  263. package/include/numkong/trigonometry/v128relaxed.h +666 -0
  264. package/include/numkong/trigonometry.h +467 -0
  265. package/include/numkong/trigonometry.hpp +166 -0
  266. package/include/numkong/types.h +1384 -0
  267. package/include/numkong/types.hpp +5603 -0
  268. package/include/numkong/vector.hpp +698 -0
  269. package/javascript/README.md +246 -0
  270. package/javascript/dist/cjs/numkong-wasm.d.ts +166 -0
  271. package/javascript/dist/cjs/numkong-wasm.js +617 -0
  272. package/javascript/dist/cjs/numkong.d.ts +343 -0
  273. package/javascript/dist/cjs/numkong.js +523 -0
  274. package/javascript/dist/cjs/package.json +3 -0
  275. package/javascript/dist/cjs/types.d.ts +284 -0
  276. package/javascript/dist/cjs/types.js +653 -0
  277. package/javascript/dist/esm/numkong-wasm.d.ts +166 -0
  278. package/javascript/dist/esm/numkong-wasm.js +595 -0
  279. package/javascript/dist/esm/numkong.d.ts +343 -0
  280. package/javascript/dist/esm/numkong.js +452 -0
  281. package/javascript/dist/esm/package.json +3 -0
  282. package/javascript/dist/esm/types.d.ts +284 -0
  283. package/javascript/dist/esm/types.js +630 -0
  284. package/javascript/dist-package-cjs.json +3 -0
  285. package/javascript/dist-package-esm.json +3 -0
  286. package/javascript/node-gyp-build.d.ts +1 -0
  287. package/javascript/numkong-wasm.ts +756 -0
  288. package/javascript/numkong.c +689 -0
  289. package/javascript/numkong.ts +575 -0
  290. package/javascript/tsconfig-base.json +39 -0
  291. package/javascript/tsconfig-cjs.json +8 -0
  292. package/javascript/tsconfig-esm.json +8 -0
  293. package/javascript/types.ts +674 -0
  294. package/package.json +87 -0
@@ -0,0 +1,150 @@
1
+ /**
2
+ * @brief Dispatch Initialization for I4 Data Types.
3
+ * @file c/dispatch_i4.c
4
+ * @author Ash Vardanian
5
+ * @date February 3, 2026
6
+ */
7
+ #include "dispatch.h"
8
+
9
+ #ifdef __cplusplus
10
+ extern "C" {
11
+ #endif
12
+
13
+ void nk_dispatch_i4_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_punned_t *m, nk_capability_t *c) {
14
+ typedef nk_kernel_punned_t m_t;
15
+ #if NK_TARGET_V128RELAXED
16
+ if (v & nk_cap_v128relaxed_k) switch (k) {
17
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_i4_v128relaxed, *c = nk_cap_v128relaxed_k; return;
18
+ case nk_kernel_dots_packed_size_k:
19
+ *m = (m_t)&nk_dots_packed_size_i4_v128relaxed, *c = nk_cap_v128relaxed_k;
20
+ return;
21
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_i4_v128relaxed, *c = nk_cap_v128relaxed_k; return;
22
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_i4_v128relaxed, *c = nk_cap_v128relaxed_k; return;
23
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_i4_v128relaxed, *c = nk_cap_v128relaxed_k; return;
24
+ default: break;
25
+ }
26
+ #endif
27
+ #if NK_TARGET_SME
28
+ if (v & nk_cap_sme_k) switch (k) {
29
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_i4_sme, *c = nk_cap_sme_k; return;
30
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_i4_sme, *c = nk_cap_sme_k; return;
31
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_i4_sme, *c = nk_cap_sme_k; return;
32
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_i4_sme, *c = nk_cap_sme_k; return;
33
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_i4_sme, *c = nk_cap_sme_k; return;
34
+ case nk_kernel_angulars_symmetric_k: *m = (m_t)&nk_angulars_symmetric_i4_sme, *c = nk_cap_sme_k; return;
35
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_i4_sme, *c = nk_cap_sme_k; return;
36
+ case nk_kernel_euclideans_symmetric_k: *m = (m_t)&nk_euclideans_symmetric_i4_sme, *c = nk_cap_sme_k; return;
37
+ default: break;
38
+ }
39
+ #endif
40
+ #if NK_TARGET_NEONSDOT
41
+ if (v & nk_cap_neonsdot_k) switch (k) {
42
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_i4_neonsdot, *c = nk_cap_neonsdot_k; return;
43
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_i4_neonsdot, *c = nk_cap_neonsdot_k; return;
44
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_i4_neonsdot, *c = nk_cap_neonsdot_k; return;
45
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_i4_neonsdot, *c = nk_cap_neonsdot_k; return;
46
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_i4_neonsdot, *c = nk_cap_neonsdot_k; return;
47
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_i4_neonsdot, *c = nk_cap_neonsdot_k; return;
48
+ case nk_kernel_angulars_symmetric_k:
49
+ *m = (m_t)&nk_angulars_symmetric_i4_neonsdot, *c = nk_cap_neonsdot_k;
50
+ return;
51
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_i4_neonsdot, *c = nk_cap_neonsdot_k; return;
52
+ case nk_kernel_euclideans_symmetric_k:
53
+ *m = (m_t)&nk_euclideans_symmetric_i4_neonsdot, *c = nk_cap_neonsdot_k;
54
+ return;
55
+ default: break;
56
+ }
57
+ #endif
58
+ #if NK_TARGET_ICELAKE
59
+ if (v & nk_cap_icelake_k) switch (k) {
60
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_i4_icelake, *c = nk_cap_icelake_k; return;
61
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_i4_icelake, *c = nk_cap_icelake_k; return;
62
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_i4_icelake, *c = nk_cap_icelake_k; return;
63
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_i4_icelake, *c = nk_cap_icelake_k; return;
64
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_i4_icelake, *c = nk_cap_icelake_k; return;
65
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_i4_icelake, *c = nk_cap_icelake_k; return;
66
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_i4_icelake, *c = nk_cap_icelake_k; return;
67
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_i4_icelake, *c = nk_cap_icelake_k; return;
68
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_i4_icelake, *c = nk_cap_icelake_k; return;
69
+ case nk_kernel_angulars_symmetric_k: *m = (m_t)&nk_angulars_symmetric_i4_icelake, *c = nk_cap_icelake_k; return;
70
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_i4_icelake, *c = nk_cap_icelake_k; return;
71
+ case nk_kernel_euclideans_symmetric_k:
72
+ *m = (m_t)&nk_euclideans_symmetric_i4_icelake, *c = nk_cap_icelake_k;
73
+ return;
74
+ default: break;
75
+ }
76
+ #endif
77
+ #if NK_TARGET_SKYLAKE
78
+ if (v & nk_cap_skylake_k) switch (k) {
79
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_i4_skylake, *c = nk_cap_skylake_k; return;
80
+ default: break;
81
+ }
82
+ #endif
83
+ #if NK_TARGET_HASWELL
84
+ if (v & nk_cap_haswell_k) switch (k) {
85
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_i4_haswell, *c = nk_cap_haswell_k; return;
86
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_i4_haswell, *c = nk_cap_haswell_k; return;
87
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_i4_haswell, *c = nk_cap_haswell_k; return;
88
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_i4_haswell, *c = nk_cap_haswell_k; return;
89
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_i4_haswell, *c = nk_cap_haswell_k; return;
90
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_i4_haswell, *c = nk_cap_haswell_k; return;
91
+ default: break;
92
+ }
93
+ #endif
94
+ #if NK_TARGET_RVV
95
+ if (v & nk_cap_rvv_k) switch (k) {
96
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_i4_rvv, *c = nk_cap_rvv_k; return;
97
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_i4_rvv, *c = nk_cap_rvv_k; return;
98
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_i4_rvv, *c = nk_cap_rvv_k; return;
99
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_i4_rvv, *c = nk_cap_rvv_k; return;
100
+ default: break;
101
+ }
102
+ #endif
103
+ if (v & nk_cap_serial_k) switch (k) {
104
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_i4_serial, *c = nk_cap_serial_k; return;
105
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_i4_serial, *c = nk_cap_serial_k; return;
106
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_i4_serial, *c = nk_cap_serial_k; return;
107
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_i4_serial, *c = nk_cap_serial_k; return;
108
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_i4_serial, *c = nk_cap_serial_k; return;
109
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_i4_serial, *c = nk_cap_serial_k; return;
110
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_i4_serial, *c = nk_cap_serial_k; return;
111
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_i4_serial, *c = nk_cap_serial_k; return;
112
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_i4_serial, *c = nk_cap_serial_k; return;
113
+ case nk_kernel_angulars_symmetric_k: *m = (m_t)&nk_angulars_symmetric_i4_serial, *c = nk_cap_serial_k; return;
114
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_i4_serial, *c = nk_cap_serial_k; return;
115
+ case nk_kernel_euclideans_symmetric_k:
116
+ *m = (m_t)&nk_euclideans_symmetric_i4_serial, *c = nk_cap_serial_k;
117
+ return;
118
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_i4_serial, *c = nk_cap_serial_k; return;
119
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_i4_serial, *c = nk_cap_serial_k; return;
120
+ default: break;
121
+ }
122
+
123
+ // Error fallback - zero capability signals lookup failure
124
+ *m = (m_t)nk_error_dense_, *c = 0;
125
+ }
126
+
127
+ void nk_dispatch_i4_init_(nk_capability_t caps) {
128
+ nk_implementations_t *t = &nk_dispatch_table;
129
+ nk_capability_t used;
130
+
131
+ nk_dispatch_i4_find_(caps, nk_kernel_dot_k, (nk_kernel_punned_t *)&t->dot_i4, &used);
132
+ nk_dispatch_i4_find_(caps, nk_kernel_angular_k, (nk_kernel_punned_t *)&t->angular_i4, &used);
133
+ nk_dispatch_i4_find_(caps, nk_kernel_euclidean_k, (nk_kernel_punned_t *)&t->euclidean_i4, &used);
134
+ nk_dispatch_i4_find_(caps, nk_kernel_sqeuclidean_k, (nk_kernel_punned_t *)&t->sqeuclidean_i4, &used);
135
+ nk_dispatch_i4_find_(caps, nk_kernel_reduce_moments_k, (nk_kernel_punned_t *)&t->reduce_moments_i4, &used);
136
+ nk_dispatch_i4_find_(caps, nk_kernel_reduce_minmax_k, (nk_kernel_punned_t *)&t->reduce_minmax_i4, &used);
137
+ nk_dispatch_i4_find_(caps, nk_kernel_dots_packed_size_k, (nk_kernel_punned_t *)&t->dots_packed_size_i4, &used);
138
+ nk_dispatch_i4_find_(caps, nk_kernel_dots_pack_k, (nk_kernel_punned_t *)&t->dots_pack_i4, &used);
139
+ nk_dispatch_i4_find_(caps, nk_kernel_dots_packed_k, (nk_kernel_punned_t *)&t->dots_packed_i4, &used);
140
+ nk_dispatch_i4_find_(caps, nk_kernel_dots_symmetric_k, (nk_kernel_punned_t *)&t->dots_symmetric_i4, &used);
141
+ nk_dispatch_i4_find_(caps, nk_kernel_angulars_packed_k, (nk_kernel_punned_t *)&t->angulars_packed_i4, &used);
142
+ nk_dispatch_i4_find_(caps, nk_kernel_angulars_symmetric_k, (nk_kernel_punned_t *)&t->angulars_symmetric_i4, &used);
143
+ nk_dispatch_i4_find_(caps, nk_kernel_euclideans_packed_k, (nk_kernel_punned_t *)&t->euclideans_packed_i4, &used);
144
+ nk_dispatch_i4_find_(caps, nk_kernel_euclideans_symmetric_k, (nk_kernel_punned_t *)&t->euclideans_symmetric_i4,
145
+ &used);
146
+ }
147
+
148
+ #ifdef __cplusplus
149
+ }
150
+ #endif
@@ -0,0 +1,86 @@
1
+ /**
2
+ * @brief Dispatch Initialization for I64 Data Types.
3
+ * @file c/dispatch_i64.c
4
+ * @author Ash Vardanian
5
+ * @date February 3, 2026
6
+ */
7
+ #include "dispatch.h"
8
+
9
+ void nk_dispatch_i64_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_punned_t *m, nk_capability_t *c) {
10
+ typedef nk_kernel_punned_t m_t;
11
+ #if NK_TARGET_V128RELAXED
12
+ if (v & nk_cap_v128relaxed_k) switch (k) {
13
+ case nk_kernel_reduce_moments_k:
14
+ *m = (m_t)&nk_reduce_moments_i64_v128relaxed, *c = nk_cap_v128relaxed_k;
15
+ return;
16
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_i64_v128relaxed, *c = nk_cap_v128relaxed_k; return;
17
+ default: break;
18
+ }
19
+ #endif
20
+ #if NK_TARGET_NEON
21
+ if (v & nk_cap_neon_k) switch (k) {
22
+ case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_i64_neon, *c = nk_cap_neon_k; return;
23
+ case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_i64_neon, *c = nk_cap_neon_k; return;
24
+ case nk_kernel_each_sum_k: *m = (m_t)&nk_each_sum_i64_neon, *c = nk_cap_neon_k; return;
25
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_i64_neon, *c = nk_cap_neon_k; return;
26
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_i64_neon, *c = nk_cap_neon_k; return;
27
+ default: break;
28
+ }
29
+ #endif
30
+ #if NK_TARGET_ICELAKE
31
+ if (v & nk_cap_icelake_k) switch (k) {
32
+ case nk_kernel_each_sum_k: *m = (m_t)&nk_each_sum_i64_icelake, *c = nk_cap_icelake_k; return;
33
+ default: break;
34
+ }
35
+ #endif
36
+ #if NK_TARGET_SKYLAKE
37
+ if (v & nk_cap_skylake_k) switch (k) {
38
+ case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_i64_skylake, *c = nk_cap_skylake_k; return;
39
+ case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_i64_skylake, *c = nk_cap_skylake_k; return;
40
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_i64_skylake, *c = nk_cap_skylake_k; return;
41
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_i64_skylake, *c = nk_cap_skylake_k; return;
42
+ default: break;
43
+ }
44
+ #endif
45
+ #if NK_TARGET_HASWELL
46
+ if (v & nk_cap_haswell_k) switch (k) {
47
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_i64_haswell, *c = nk_cap_haswell_k; return;
48
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_i64_haswell, *c = nk_cap_haswell_k; return;
49
+ default: break;
50
+ }
51
+ #endif
52
+ #if NK_TARGET_RVV
53
+ if (v & nk_cap_rvv_k) switch (k) {
54
+ case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_i64_rvv, *c = nk_cap_rvv_k; return;
55
+ case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_i64_rvv, *c = nk_cap_rvv_k; return;
56
+ case nk_kernel_each_sum_k: *m = (m_t)&nk_each_sum_i64_rvv, *c = nk_cap_rvv_k; return;
57
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_i64_rvv, *c = nk_cap_rvv_k; return;
58
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_i64_rvv, *c = nk_cap_rvv_k; return;
59
+ default: break;
60
+ }
61
+ #endif
62
+ if (v & nk_cap_serial_k) switch (k) {
63
+ case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_i64_serial, *c = nk_cap_serial_k; return;
64
+ case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_i64_serial, *c = nk_cap_serial_k; return;
65
+ case nk_kernel_each_sum_k: *m = (m_t)&nk_each_sum_i64_serial, *c = nk_cap_serial_k; return;
66
+ case nk_kernel_each_blend_k: *m = (m_t)&nk_each_blend_i64_serial, *c = nk_cap_serial_k; return;
67
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_i64_serial, *c = nk_cap_serial_k; return;
68
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_i64_serial, *c = nk_cap_serial_k; return;
69
+ default: break;
70
+ }
71
+
72
+ // Error fallback - zero capability signals lookup failure
73
+ *m = (m_t)nk_error_dense_, *c = 0;
74
+ }
75
+
76
+ void nk_dispatch_i64_init_(nk_capability_t caps) {
77
+ nk_implementations_t *t = &nk_dispatch_table;
78
+ nk_capability_t used;
79
+
80
+ nk_dispatch_i64_find_(caps, nk_kernel_each_scale_k, (nk_kernel_punned_t *)&t->each_scale_i64, &used);
81
+ nk_dispatch_i64_find_(caps, nk_kernel_each_sum_k, (nk_kernel_punned_t *)&t->each_sum_i64, &used);
82
+ nk_dispatch_i64_find_(caps, nk_kernel_each_blend_k, (nk_kernel_punned_t *)&t->each_blend_i64, &used);
83
+ nk_dispatch_i64_find_(caps, nk_kernel_each_fma_k, (nk_kernel_punned_t *)&t->each_fma_i64, &used);
84
+ nk_dispatch_i64_find_(caps, nk_kernel_reduce_moments_k, (nk_kernel_punned_t *)&t->reduce_moments_i64, &used);
85
+ nk_dispatch_i64_find_(caps, nk_kernel_reduce_minmax_k, (nk_kernel_punned_t *)&t->reduce_minmax_i64, &used);
86
+ }
@@ -0,0 +1,289 @@
1
+ /**
2
+ * @brief Dispatch Initialization for I8 Data Types.
3
+ * @file c/dispatch_i8.c
4
+ * @author Ash Vardanian
5
+ * @date February 3, 2026
6
+ */
7
+ #include "dispatch.h"
8
+
9
+ void nk_dispatch_i8_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_punned_t *m, nk_capability_t *c) {
10
+ typedef nk_kernel_punned_t m_t;
11
+ #if NK_TARGET_V128RELAXED
12
+ if (v & nk_cap_v128relaxed_k) switch (k) {
13
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_i8_v128relaxed, *c = nk_cap_v128relaxed_k; return;
14
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_i8_v128relaxed, *c = nk_cap_v128relaxed_k; return;
15
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_i8_v128relaxed, *c = nk_cap_v128relaxed_k; return;
16
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_i8_v128relaxed, *c = nk_cap_v128relaxed_k; return;
17
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_i8_v128relaxed, *c = nk_cap_v128relaxed_k; return;
18
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_i8_v128relaxed, *c = nk_cap_v128relaxed_k; return;
19
+ case nk_kernel_dots_packed_size_k:
20
+ *m = (m_t)&nk_dots_packed_size_i8_v128relaxed, *c = nk_cap_v128relaxed_k;
21
+ return;
22
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_i8_v128relaxed, *c = nk_cap_v128relaxed_k; return;
23
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_i8_v128relaxed, *c = nk_cap_v128relaxed_k; return;
24
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_i8_v128relaxed, *c = nk_cap_v128relaxed_k; return;
25
+ case nk_kernel_angulars_packed_k:
26
+ *m = (m_t)&nk_angulars_packed_i8_v128relaxed, *c = nk_cap_v128relaxed_k;
27
+ return;
28
+ case nk_kernel_angulars_symmetric_k:
29
+ *m = (m_t)&nk_angulars_symmetric_i8_v128relaxed, *c = nk_cap_v128relaxed_k;
30
+ return;
31
+ case nk_kernel_euclideans_packed_k:
32
+ *m = (m_t)&nk_euclideans_packed_i8_v128relaxed, *c = nk_cap_v128relaxed_k;
33
+ return;
34
+ case nk_kernel_euclideans_symmetric_k:
35
+ *m = (m_t)&nk_euclideans_symmetric_i8_v128relaxed, *c = nk_cap_v128relaxed_k;
36
+ return;
37
+ default: break;
38
+ }
39
+ #endif
40
+ #if NK_TARGET_SME
41
+ if (v & nk_cap_sme_k) switch (k) {
42
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_i8_sme, *c = nk_cap_sme_k; return;
43
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_i8_sme, *c = nk_cap_sme_k; return;
44
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_i8_sme, *c = nk_cap_sme_k; return;
45
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_i8_sme, *c = nk_cap_sme_k; return;
46
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_i8_sme, *c = nk_cap_sme_k; return;
47
+ case nk_kernel_angulars_symmetric_k: *m = (m_t)&nk_angulars_symmetric_i8_sme, *c = nk_cap_sme_k; return;
48
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_i8_sme, *c = nk_cap_sme_k; return;
49
+ case nk_kernel_euclideans_symmetric_k: *m = (m_t)&nk_euclideans_symmetric_i8_sme, *c = nk_cap_sme_k; return;
50
+ default: break;
51
+ }
52
+ #endif
53
+ #if NK_TARGET_NEONSDOT
54
+ if (v & nk_cap_neonsdot_k) switch (k) {
55
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_i8_neonsdot, *c = nk_cap_neonsdot_k; return;
56
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_i8_neonsdot, *c = nk_cap_neonsdot_k; return;
57
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_i8_neonsdot, *c = nk_cap_neonsdot_k; return;
58
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_i8_neonsdot, *c = nk_cap_neonsdot_k; return;
59
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_i8_neonsdot, *c = nk_cap_neonsdot_k; return;
60
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_i8_neonsdot, *c = nk_cap_neonsdot_k; return;
61
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_i8_neonsdot, *c = nk_cap_neonsdot_k; return;
62
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_i8_neonsdot, *c = nk_cap_neonsdot_k; return;
63
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_i8_neonsdot, *c = nk_cap_neonsdot_k; return;
64
+ case nk_kernel_angulars_symmetric_k:
65
+ *m = (m_t)&nk_angulars_symmetric_i8_neonsdot, *c = nk_cap_neonsdot_k;
66
+ return;
67
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_i8_neonsdot, *c = nk_cap_neonsdot_k; return;
68
+ case nk_kernel_euclideans_symmetric_k:
69
+ *m = (m_t)&nk_euclideans_symmetric_i8_neonsdot, *c = nk_cap_neonsdot_k;
70
+ return;
71
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_i8_neonsdot, *c = nk_cap_neonsdot_k; return;
72
+ default: break;
73
+ }
74
+ #endif
75
+ #if NK_TARGET_NEONHALF
76
+ if (v & nk_cap_neonhalf_k) switch (k) {
77
+ case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_i8_neonhalf, *c = nk_cap_neonhalf_k; return;
78
+ case nk_kernel_each_blend_k: *m = (m_t)&nk_each_blend_i8_neonhalf, *c = nk_cap_neonhalf_k; return;
79
+ case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_i8_neonhalf, *c = nk_cap_neonhalf_k; return;
80
+ case nk_kernel_each_sum_k: *m = (m_t)&nk_each_sum_i8_neonhalf, *c = nk_cap_neonhalf_k; return;
81
+ default: break;
82
+ }
83
+ #endif
84
+ #if NK_TARGET_NEON
85
+ if (v & nk_cap_neon_k) switch (k) {
86
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_i8_neon, *c = nk_cap_neon_k; return;
87
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_i8_neon, *c = nk_cap_neon_k; return;
88
+ default: break;
89
+ }
90
+ #endif
91
+ #if NK_TARGET_SAPPHIREAMX
92
+ if (v & nk_cap_sapphireamx_k) switch (k) {
93
+ case nk_kernel_dots_packed_size_k:
94
+ *m = (m_t)&nk_dots_packed_size_i8_sapphireamx, *c = nk_cap_sapphireamx_k;
95
+ return;
96
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_i8_sapphireamx, *c = nk_cap_sapphireamx_k; return;
97
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_i8_sapphireamx, *c = nk_cap_sapphireamx_k; return;
98
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_i8_sapphireamx, *c = nk_cap_sapphireamx_k; return;
99
+ case nk_kernel_angulars_packed_k:
100
+ *m = (m_t)&nk_angulars_packed_i8_sapphireamx, *c = nk_cap_sapphireamx_k;
101
+ return;
102
+ case nk_kernel_angulars_symmetric_k:
103
+ *m = (m_t)&nk_angulars_symmetric_i8_sapphireamx, *c = nk_cap_sapphireamx_k;
104
+ return;
105
+ case nk_kernel_euclideans_packed_k:
106
+ *m = (m_t)&nk_euclideans_packed_i8_sapphireamx, *c = nk_cap_sapphireamx_k;
107
+ return;
108
+ case nk_kernel_euclideans_symmetric_k:
109
+ *m = (m_t)&nk_euclideans_symmetric_i8_sapphireamx, *c = nk_cap_sapphireamx_k;
110
+ return;
111
+ default: break;
112
+ }
113
+ #endif
114
+ #if NK_TARGET_SAPPHIRE
115
+ if (v & nk_cap_sapphire_k) switch (k) {
116
+ case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_i8_sapphire, *c = nk_cap_sapphire_k; return;
117
+ case nk_kernel_each_blend_k: *m = (m_t)&nk_each_blend_i8_sapphire, *c = nk_cap_sapphire_k; return;
118
+ case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_i8_sapphire, *c = nk_cap_sapphire_k; return;
119
+ default: break;
120
+ }
121
+ #endif
122
+ #if NK_TARGET_ICELAKE
123
+ if (v & nk_cap_icelake_k) switch (k) {
124
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_i8_icelake, *c = nk_cap_icelake_k; return;
125
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_i8_icelake, *c = nk_cap_icelake_k; return;
126
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_i8_icelake, *c = nk_cap_icelake_k; return;
127
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_i8_icelake, *c = nk_cap_icelake_k; return;
128
+ case nk_kernel_each_sum_k: *m = (m_t)&nk_each_sum_i8_icelake, *c = nk_cap_icelake_k; return;
129
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_i8_icelake, *c = nk_cap_icelake_k; return;
130
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_i8_icelake, *c = nk_cap_icelake_k; return;
131
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_i8_icelake, *c = nk_cap_icelake_k; return;
132
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_i8_icelake, *c = nk_cap_icelake_k; return;
133
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_i8_icelake, *c = nk_cap_icelake_k; return;
134
+ case nk_kernel_angulars_symmetric_k: *m = (m_t)&nk_angulars_symmetric_i8_icelake, *c = nk_cap_icelake_k; return;
135
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_i8_icelake, *c = nk_cap_icelake_k; return;
136
+ case nk_kernel_euclideans_symmetric_k:
137
+ *m = (m_t)&nk_euclideans_symmetric_i8_icelake, *c = nk_cap_icelake_k;
138
+ return;
139
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_i8_icelake, *c = nk_cap_icelake_k; return;
140
+ default: break;
141
+ }
142
+ #endif
143
+ #if NK_TARGET_SKYLAKE
144
+ if (v & nk_cap_skylake_k) switch (k) {
145
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_i8_skylake, *c = nk_cap_skylake_k; return;
146
+ case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_i8_skylake, *c = nk_cap_skylake_k; return;
147
+ case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_i8_skylake, *c = nk_cap_skylake_k; return;
148
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_i8_skylake, *c = nk_cap_skylake_k; return;
149
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_i8_skylake, *c = nk_cap_skylake_k; return;
150
+ default: break;
151
+ }
152
+ #endif
153
+ #if NK_TARGET_SIERRA
154
+ if (v & nk_cap_sierra_k) switch (k) {
155
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_i8_sierra, *c = nk_cap_sierra_k; return;
156
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_i8_sierra, *c = nk_cap_sierra_k; return;
157
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_i8_sierra, *c = nk_cap_sierra_k; return;
158
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_i8_sierra, *c = nk_cap_sierra_k; return;
159
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_i8_sierra, *c = nk_cap_sierra_k; return;
160
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_i8_sierra, *c = nk_cap_sierra_k; return;
161
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_i8_sierra, *c = nk_cap_sierra_k; return;
162
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_i8_sierra, *c = nk_cap_sierra_k; return;
163
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_i8_sierra, *c = nk_cap_sierra_k; return;
164
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_i8_sierra, *c = nk_cap_sierra_k; return;
165
+ case nk_kernel_angulars_symmetric_k: *m = (m_t)&nk_angulars_symmetric_i8_sierra, *c = nk_cap_sierra_k; return;
166
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_i8_sierra, *c = nk_cap_sierra_k; return;
167
+ case nk_kernel_euclideans_symmetric_k:
168
+ *m = (m_t)&nk_euclideans_symmetric_i8_sierra, *c = nk_cap_sierra_k;
169
+ return;
170
+ default: break;
171
+ }
172
+ #endif
173
+ #if NK_TARGET_ALDER
174
+ if (v & nk_cap_alder_k) switch (k) {
175
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_i8_alder, *c = nk_cap_alder_k; return;
176
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_i8_alder, *c = nk_cap_alder_k; return;
177
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_i8_alder, *c = nk_cap_alder_k; return;
178
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_i8_alder, *c = nk_cap_alder_k; return;
179
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_i8_alder, *c = nk_cap_alder_k; return;
180
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_i8_alder, *c = nk_cap_alder_k; return;
181
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_i8_alder, *c = nk_cap_alder_k; return;
182
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_i8_alder, *c = nk_cap_alder_k; return;
183
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_i8_alder, *c = nk_cap_alder_k; return;
184
+ case nk_kernel_angulars_symmetric_k: *m = (m_t)&nk_angulars_symmetric_i8_alder, *c = nk_cap_alder_k; return;
185
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_i8_alder, *c = nk_cap_alder_k; return;
186
+ case nk_kernel_euclideans_symmetric_k: *m = (m_t)&nk_euclideans_symmetric_i8_alder, *c = nk_cap_alder_k; return;
187
+ default: break;
188
+ }
189
+ #endif
190
+ #if NK_TARGET_HASWELL
191
+ if (v & nk_cap_haswell_k) switch (k) {
192
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_i8_haswell, *c = nk_cap_haswell_k; return;
193
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_i8_haswell, *c = nk_cap_haswell_k; return;
194
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_i8_haswell, *c = nk_cap_haswell_k; return;
195
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_i8_haswell, *c = nk_cap_haswell_k; return;
196
+ case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_i8_haswell, *c = nk_cap_haswell_k; return;
197
+ case nk_kernel_each_blend_k: *m = (m_t)&nk_each_blend_i8_haswell, *c = nk_cap_haswell_k; return;
198
+ case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_i8_haswell, *c = nk_cap_haswell_k; return;
199
+ case nk_kernel_each_sum_k: *m = (m_t)&nk_each_sum_i8_haswell, *c = nk_cap_haswell_k; return;
200
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_i8_haswell, *c = nk_cap_haswell_k; return;
201
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_i8_haswell, *c = nk_cap_haswell_k; return;
202
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_i8_haswell, *c = nk_cap_haswell_k; return;
203
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_i8_haswell, *c = nk_cap_haswell_k; return;
204
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_i8_haswell, *c = nk_cap_haswell_k; return;
205
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_i8_haswell, *c = nk_cap_haswell_k; return;
206
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_i8_haswell, *c = nk_cap_haswell_k; return;
207
+ case nk_kernel_angulars_symmetric_k: *m = (m_t)&nk_angulars_symmetric_i8_haswell, *c = nk_cap_haswell_k; return;
208
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_i8_haswell, *c = nk_cap_haswell_k; return;
209
+ case nk_kernel_euclideans_symmetric_k:
210
+ *m = (m_t)&nk_euclideans_symmetric_i8_haswell, *c = nk_cap_haswell_k;
211
+ return;
212
+ default: break;
213
+ }
214
+ #endif
215
+ #if NK_TARGET_RVV
216
+ if (v & nk_cap_rvv_k) switch (k) {
217
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_i8_rvv, *c = nk_cap_rvv_k; return;
218
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_i8_rvv, *c = nk_cap_rvv_k; return;
219
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_i8_rvv, *c = nk_cap_rvv_k; return;
220
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_i8_rvv, *c = nk_cap_rvv_k; return;
221
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_i8_rvv, *c = nk_cap_rvv_k; return;
222
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_i8_rvv, *c = nk_cap_rvv_k; return;
223
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_i8_rvv, *c = nk_cap_rvv_k; return;
224
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_i8_rvv, *c = nk_cap_rvv_k; return;
225
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_i8_rvv, *c = nk_cap_rvv_k; return;
226
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_i8_rvv, *c = nk_cap_rvv_k; return;
227
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_i8_rvv, *c = nk_cap_rvv_k; return;
228
+ case nk_kernel_angulars_symmetric_k: *m = (m_t)&nk_angulars_symmetric_i8_rvv, *c = nk_cap_rvv_k; return;
229
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_i8_rvv, *c = nk_cap_rvv_k; return;
230
+ case nk_kernel_euclideans_symmetric_k: *m = (m_t)&nk_euclideans_symmetric_i8_rvv, *c = nk_cap_rvv_k; return;
231
+ case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_i8_rvv, *c = nk_cap_rvv_k; return;
232
+ case nk_kernel_each_blend_k: *m = (m_t)&nk_each_blend_i8_rvv, *c = nk_cap_rvv_k; return;
233
+ case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_i8_rvv, *c = nk_cap_rvv_k; return;
234
+ case nk_kernel_each_sum_k: *m = (m_t)&nk_each_sum_i8_rvv, *c = nk_cap_rvv_k; return;
235
+ default: break;
236
+ }
237
+ #endif
238
+ if (v & nk_cap_serial_k) switch (k) {
239
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_i8_serial, *c = nk_cap_serial_k; return;
240
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_i8_serial, *c = nk_cap_serial_k; return;
241
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_i8_serial, *c = nk_cap_serial_k; return;
242
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_i8_serial, *c = nk_cap_serial_k; return;
243
+ case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_i8_serial, *c = nk_cap_serial_k; return;
244
+ case nk_kernel_each_blend_k: *m = (m_t)&nk_each_blend_i8_serial, *c = nk_cap_serial_k; return;
245
+ case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_i8_serial, *c = nk_cap_serial_k; return;
246
+ case nk_kernel_each_sum_k: *m = (m_t)&nk_each_sum_i8_serial, *c = nk_cap_serial_k; return;
247
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_i8_serial, *c = nk_cap_serial_k; return;
248
+ case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_i8_serial, *c = nk_cap_serial_k; return;
249
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_i8_serial, *c = nk_cap_serial_k; return;
250
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_i8_serial, *c = nk_cap_serial_k; return;
251
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_i8_serial, *c = nk_cap_serial_k; return;
252
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_i8_serial, *c = nk_cap_serial_k; return;
253
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_i8_serial, *c = nk_cap_serial_k; return;
254
+ case nk_kernel_angulars_symmetric_k: *m = (m_t)&nk_angulars_symmetric_i8_serial, *c = nk_cap_serial_k; return;
255
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_i8_serial, *c = nk_cap_serial_k; return;
256
+ case nk_kernel_euclideans_symmetric_k:
257
+ *m = (m_t)&nk_euclideans_symmetric_i8_serial, *c = nk_cap_serial_k;
258
+ return;
259
+ default: break;
260
+ }
261
+
262
+ // Error fallback - zero capability signals lookup failure
263
+ *m = (m_t)nk_error_dense_, *c = 0;
264
+ }
265
+
266
+ void nk_dispatch_i8_init_(nk_capability_t caps) {
267
+ nk_implementations_t *t = &nk_dispatch_table;
268
+ nk_capability_t used;
269
+
270
+ nk_dispatch_i8_find_(caps, nk_kernel_dot_k, (nk_kernel_punned_t *)&t->dot_i8, &used);
271
+ nk_dispatch_i8_find_(caps, nk_kernel_angular_k, (nk_kernel_punned_t *)&t->angular_i8, &used);
272
+ nk_dispatch_i8_find_(caps, nk_kernel_euclidean_k, (nk_kernel_punned_t *)&t->euclidean_i8, &used);
273
+ nk_dispatch_i8_find_(caps, nk_kernel_sqeuclidean_k, (nk_kernel_punned_t *)&t->sqeuclidean_i8, &used);
274
+ nk_dispatch_i8_find_(caps, nk_kernel_each_scale_k, (nk_kernel_punned_t *)&t->each_scale_i8, &used);
275
+ nk_dispatch_i8_find_(caps, nk_kernel_each_sum_k, (nk_kernel_punned_t *)&t->each_sum_i8, &used);
276
+ nk_dispatch_i8_find_(caps, nk_kernel_each_blend_k, (nk_kernel_punned_t *)&t->each_blend_i8, &used);
277
+ nk_dispatch_i8_find_(caps, nk_kernel_each_fma_k, (nk_kernel_punned_t *)&t->each_fma_i8, &used);
278
+ nk_dispatch_i8_find_(caps, nk_kernel_reduce_moments_k, (nk_kernel_punned_t *)&t->reduce_moments_i8, &used);
279
+ nk_dispatch_i8_find_(caps, nk_kernel_reduce_minmax_k, (nk_kernel_punned_t *)&t->reduce_minmax_i8, &used);
280
+ nk_dispatch_i8_find_(caps, nk_kernel_dots_packed_size_k, (nk_kernel_punned_t *)&t->dots_packed_size_i8, &used);
281
+ nk_dispatch_i8_find_(caps, nk_kernel_dots_pack_k, (nk_kernel_punned_t *)&t->dots_pack_i8, &used);
282
+ nk_dispatch_i8_find_(caps, nk_kernel_dots_packed_k, (nk_kernel_punned_t *)&t->dots_packed_i8, &used);
283
+ nk_dispatch_i8_find_(caps, nk_kernel_dots_symmetric_k, (nk_kernel_punned_t *)&t->dots_symmetric_i8, &used);
284
+ nk_dispatch_i8_find_(caps, nk_kernel_angulars_packed_k, (nk_kernel_punned_t *)&t->angulars_packed_i8, &used);
285
+ nk_dispatch_i8_find_(caps, nk_kernel_angulars_symmetric_k, (nk_kernel_punned_t *)&t->angulars_symmetric_i8, &used);
286
+ nk_dispatch_i8_find_(caps, nk_kernel_euclideans_packed_k, (nk_kernel_punned_t *)&t->euclideans_packed_i8, &used);
287
+ nk_dispatch_i8_find_(caps, nk_kernel_euclideans_symmetric_k, (nk_kernel_punned_t *)&t->euclideans_symmetric_i8,
288
+ &used);
289
+ }