numkong 7.0.0 → 7.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (315) hide show
  1. package/README.md +197 -124
  2. package/binding.gyp +34 -484
  3. package/c/dispatch_bf16.c +59 -1
  4. package/c/dispatch_e2m3.c +41 -8
  5. package/c/dispatch_e3m2.c +49 -8
  6. package/c/dispatch_e4m3.c +51 -9
  7. package/c/dispatch_e5m2.c +45 -1
  8. package/c/dispatch_f16.c +79 -26
  9. package/c/dispatch_f16c.c +5 -5
  10. package/c/dispatch_f32.c +56 -0
  11. package/c/dispatch_f64.c +52 -0
  12. package/c/dispatch_i4.c +3 -0
  13. package/c/dispatch_i8.c +62 -3
  14. package/c/dispatch_other.c +18 -0
  15. package/c/dispatch_u1.c +54 -9
  16. package/c/dispatch_u4.c +3 -0
  17. package/c/dispatch_u8.c +64 -3
  18. package/c/numkong.c +3 -0
  19. package/include/README.md +79 -9
  20. package/include/numkong/attention/sapphireamx.h +278 -276
  21. package/include/numkong/attention/sme.h +983 -977
  22. package/include/numkong/attention.h +1 -1
  23. package/include/numkong/capabilities.h +289 -94
  24. package/include/numkong/cast/README.md +40 -40
  25. package/include/numkong/cast/diamond.h +64 -0
  26. package/include/numkong/cast/haswell.h +42 -194
  27. package/include/numkong/cast/icelake.h +42 -37
  28. package/include/numkong/cast/loongsonasx.h +252 -0
  29. package/include/numkong/cast/neon.h +216 -249
  30. package/include/numkong/cast/powervsx.h +449 -0
  31. package/include/numkong/cast/rvv.h +223 -274
  32. package/include/numkong/cast/sapphire.h +18 -18
  33. package/include/numkong/cast/serial.h +1018 -944
  34. package/include/numkong/cast/skylake.h +82 -23
  35. package/include/numkong/cast/v128relaxed.h +462 -105
  36. package/include/numkong/cast.h +24 -0
  37. package/include/numkong/cast.hpp +44 -0
  38. package/include/numkong/curved/README.md +17 -17
  39. package/include/numkong/curved/neon.h +131 -7
  40. package/include/numkong/curved/neonbfdot.h +6 -7
  41. package/include/numkong/curved/rvv.h +26 -26
  42. package/include/numkong/curved/smef64.h +186 -182
  43. package/include/numkong/curved.h +14 -18
  44. package/include/numkong/dot/README.md +154 -137
  45. package/include/numkong/dot/alder.h +43 -43
  46. package/include/numkong/dot/diamond.h +158 -0
  47. package/include/numkong/dot/genoa.h +4 -30
  48. package/include/numkong/dot/haswell.h +215 -180
  49. package/include/numkong/dot/icelake.h +190 -76
  50. package/include/numkong/dot/loongsonasx.h +671 -0
  51. package/include/numkong/dot/neon.h +124 -73
  52. package/include/numkong/dot/neonbfdot.h +11 -12
  53. package/include/numkong/dot/neonfhm.h +44 -46
  54. package/include/numkong/dot/neonfp8.h +323 -0
  55. package/include/numkong/dot/neonsdot.h +190 -76
  56. package/include/numkong/dot/powervsx.h +752 -0
  57. package/include/numkong/dot/rvv.h +92 -84
  58. package/include/numkong/dot/rvvbf16.h +12 -12
  59. package/include/numkong/dot/rvvhalf.h +12 -12
  60. package/include/numkong/dot/sapphire.h +4 -4
  61. package/include/numkong/dot/serial.h +66 -30
  62. package/include/numkong/dot/sierra.h +31 -31
  63. package/include/numkong/dot/skylake.h +142 -110
  64. package/include/numkong/dot/sve.h +217 -177
  65. package/include/numkong/dot/svebfdot.h +10 -10
  66. package/include/numkong/dot/svehalf.h +85 -41
  67. package/include/numkong/dot/svesdot.h +89 -0
  68. package/include/numkong/dot/v128relaxed.h +124 -89
  69. package/include/numkong/dot.h +114 -48
  70. package/include/numkong/dots/README.md +203 -203
  71. package/include/numkong/dots/alder.h +12 -9
  72. package/include/numkong/dots/diamond.h +86 -0
  73. package/include/numkong/dots/genoa.h +10 -4
  74. package/include/numkong/dots/haswell.h +63 -48
  75. package/include/numkong/dots/icelake.h +27 -18
  76. package/include/numkong/dots/loongsonasx.h +176 -0
  77. package/include/numkong/dots/neon.h +14 -11
  78. package/include/numkong/dots/neonbfdot.h +4 -3
  79. package/include/numkong/dots/neonfhm.h +11 -9
  80. package/include/numkong/dots/neonfp8.h +99 -0
  81. package/include/numkong/dots/neonsdot.h +48 -12
  82. package/include/numkong/dots/powervsx.h +194 -0
  83. package/include/numkong/dots/rvv.h +451 -344
  84. package/include/numkong/dots/sapphireamx.h +1028 -984
  85. package/include/numkong/dots/serial.h +213 -197
  86. package/include/numkong/dots/sierra.h +10 -7
  87. package/include/numkong/dots/skylake.h +47 -36
  88. package/include/numkong/dots/sme.h +2001 -2364
  89. package/include/numkong/dots/smebi32.h +175 -162
  90. package/include/numkong/dots/smef64.h +328 -323
  91. package/include/numkong/dots/v128relaxed.h +64 -41
  92. package/include/numkong/dots.h +573 -293
  93. package/include/numkong/dots.hpp +45 -43
  94. package/include/numkong/each/README.md +133 -137
  95. package/include/numkong/each/haswell.h +6 -6
  96. package/include/numkong/each/icelake.h +7 -7
  97. package/include/numkong/each/neon.h +76 -42
  98. package/include/numkong/each/neonbfdot.h +11 -12
  99. package/include/numkong/each/neonhalf.h +24 -116
  100. package/include/numkong/each/rvv.h +28 -28
  101. package/include/numkong/each/sapphire.h +27 -161
  102. package/include/numkong/each/serial.h +6 -6
  103. package/include/numkong/each/skylake.h +7 -7
  104. package/include/numkong/each/v128relaxed.h +562 -0
  105. package/include/numkong/each.h +148 -62
  106. package/include/numkong/each.hpp +2 -2
  107. package/include/numkong/geospatial/README.md +18 -18
  108. package/include/numkong/geospatial/haswell.h +365 -325
  109. package/include/numkong/geospatial/neon.h +350 -306
  110. package/include/numkong/geospatial/rvv.h +4 -4
  111. package/include/numkong/geospatial/skylake.h +376 -340
  112. package/include/numkong/geospatial/v128relaxed.h +366 -327
  113. package/include/numkong/geospatial.h +17 -17
  114. package/include/numkong/matrix.hpp +4 -4
  115. package/include/numkong/maxsim/README.md +14 -14
  116. package/include/numkong/maxsim/alder.h +6 -6
  117. package/include/numkong/maxsim/genoa.h +4 -4
  118. package/include/numkong/maxsim/haswell.h +6 -6
  119. package/include/numkong/maxsim/icelake.h +18 -18
  120. package/include/numkong/maxsim/neonsdot.h +21 -21
  121. package/include/numkong/maxsim/sapphireamx.h +14 -14
  122. package/include/numkong/maxsim/serial.h +6 -6
  123. package/include/numkong/maxsim/sme.h +221 -196
  124. package/include/numkong/maxsim/v128relaxed.h +6 -6
  125. package/include/numkong/mesh/README.md +62 -56
  126. package/include/numkong/mesh/haswell.h +339 -464
  127. package/include/numkong/mesh/neon.h +1100 -519
  128. package/include/numkong/mesh/neonbfdot.h +36 -68
  129. package/include/numkong/mesh/rvv.h +530 -435
  130. package/include/numkong/mesh/serial.h +75 -91
  131. package/include/numkong/mesh/skylake.h +1627 -302
  132. package/include/numkong/mesh/v128relaxed.h +443 -330
  133. package/include/numkong/mesh.h +63 -49
  134. package/include/numkong/mesh.hpp +4 -4
  135. package/include/numkong/numkong.h +3 -3
  136. package/include/numkong/numkong.hpp +1 -0
  137. package/include/numkong/probability/README.md +23 -19
  138. package/include/numkong/probability/neon.h +82 -52
  139. package/include/numkong/probability/rvv.h +28 -23
  140. package/include/numkong/probability/serial.h +51 -39
  141. package/include/numkong/probability.h +20 -23
  142. package/include/numkong/random.h +1 -1
  143. package/include/numkong/reduce/README.md +143 -138
  144. package/include/numkong/reduce/alder.h +81 -77
  145. package/include/numkong/reduce/haswell.h +222 -220
  146. package/include/numkong/reduce/neon.h +629 -519
  147. package/include/numkong/reduce/neonbfdot.h +7 -218
  148. package/include/numkong/reduce/neonfhm.h +9 -381
  149. package/include/numkong/reduce/neonsdot.h +9 -9
  150. package/include/numkong/reduce/rvv.h +928 -802
  151. package/include/numkong/reduce/serial.h +23 -27
  152. package/include/numkong/reduce/sierra.h +20 -20
  153. package/include/numkong/reduce/skylake.h +326 -324
  154. package/include/numkong/reduce/v128relaxed.h +52 -52
  155. package/include/numkong/reduce.h +4 -23
  156. package/include/numkong/reduce.hpp +156 -11
  157. package/include/numkong/scalar/README.md +6 -6
  158. package/include/numkong/scalar/haswell.h +26 -17
  159. package/include/numkong/scalar/loongsonasx.h +74 -0
  160. package/include/numkong/scalar/neon.h +9 -9
  161. package/include/numkong/scalar/powervsx.h +96 -0
  162. package/include/numkong/scalar/rvv.h +2 -2
  163. package/include/numkong/scalar/sapphire.h +21 -10
  164. package/include/numkong/scalar/serial.h +21 -21
  165. package/include/numkong/scalar.h +13 -0
  166. package/include/numkong/set/README.md +28 -28
  167. package/include/numkong/set/haswell.h +12 -12
  168. package/include/numkong/set/icelake.h +14 -14
  169. package/include/numkong/set/loongsonasx.h +181 -0
  170. package/include/numkong/set/neon.h +17 -18
  171. package/include/numkong/set/powervsx.h +326 -0
  172. package/include/numkong/set/rvv.h +4 -4
  173. package/include/numkong/set/serial.h +6 -6
  174. package/include/numkong/set/sve.h +60 -59
  175. package/include/numkong/set/v128relaxed.h +6 -6
  176. package/include/numkong/set.h +21 -7
  177. package/include/numkong/sets/README.md +26 -26
  178. package/include/numkong/sets/loongsonasx.h +52 -0
  179. package/include/numkong/sets/powervsx.h +65 -0
  180. package/include/numkong/sets/smebi32.h +395 -364
  181. package/include/numkong/sets.h +83 -40
  182. package/include/numkong/sparse/README.md +4 -4
  183. package/include/numkong/sparse/icelake.h +101 -101
  184. package/include/numkong/sparse/serial.h +1 -1
  185. package/include/numkong/sparse/sve2.h +137 -141
  186. package/include/numkong/sparse/turin.h +12 -12
  187. package/include/numkong/sparse.h +10 -10
  188. package/include/numkong/spatial/README.md +230 -226
  189. package/include/numkong/spatial/alder.h +113 -116
  190. package/include/numkong/spatial/diamond.h +240 -0
  191. package/include/numkong/spatial/genoa.h +0 -68
  192. package/include/numkong/spatial/haswell.h +74 -55
  193. package/include/numkong/spatial/icelake.h +539 -58
  194. package/include/numkong/spatial/loongsonasx.h +483 -0
  195. package/include/numkong/spatial/neon.h +125 -52
  196. package/include/numkong/spatial/neonbfdot.h +8 -9
  197. package/include/numkong/spatial/neonfp8.h +258 -0
  198. package/include/numkong/spatial/neonsdot.h +180 -12
  199. package/include/numkong/spatial/powervsx.h +738 -0
  200. package/include/numkong/spatial/rvv.h +146 -139
  201. package/include/numkong/spatial/rvvbf16.h +17 -12
  202. package/include/numkong/spatial/rvvhalf.h +13 -10
  203. package/include/numkong/spatial/serial.h +13 -12
  204. package/include/numkong/spatial/sierra.h +232 -39
  205. package/include/numkong/spatial/skylake.h +73 -74
  206. package/include/numkong/spatial/sve.h +93 -72
  207. package/include/numkong/spatial/svebfdot.h +29 -29
  208. package/include/numkong/spatial/svehalf.h +52 -26
  209. package/include/numkong/spatial/svesdot.h +142 -0
  210. package/include/numkong/spatial/v128relaxed.h +293 -41
  211. package/include/numkong/spatial.h +338 -82
  212. package/include/numkong/spatials/README.md +194 -194
  213. package/include/numkong/spatials/diamond.h +82 -0
  214. package/include/numkong/spatials/haswell.h +2 -2
  215. package/include/numkong/spatials/loongsonasx.h +153 -0
  216. package/include/numkong/spatials/neonfp8.h +111 -0
  217. package/include/numkong/spatials/neonsdot.h +34 -0
  218. package/include/numkong/spatials/powervsx.h +153 -0
  219. package/include/numkong/spatials/rvv.h +259 -243
  220. package/include/numkong/spatials/sapphireamx.h +173 -173
  221. package/include/numkong/spatials/serial.h +2 -2
  222. package/include/numkong/spatials/skylake.h +2 -2
  223. package/include/numkong/spatials/sme.h +590 -605
  224. package/include/numkong/spatials/smef64.h +139 -130
  225. package/include/numkong/spatials/v128relaxed.h +2 -2
  226. package/include/numkong/spatials.h +820 -500
  227. package/include/numkong/spatials.hpp +49 -48
  228. package/include/numkong/tensor.hpp +406 -17
  229. package/include/numkong/trigonometry/README.md +19 -19
  230. package/include/numkong/trigonometry/haswell.h +402 -401
  231. package/include/numkong/trigonometry/neon.h +386 -387
  232. package/include/numkong/trigonometry/rvv.h +52 -51
  233. package/include/numkong/trigonometry/serial.h +13 -13
  234. package/include/numkong/trigonometry/skylake.h +373 -369
  235. package/include/numkong/trigonometry/v128relaxed.h +375 -374
  236. package/include/numkong/trigonometry.h +13 -13
  237. package/include/numkong/trigonometry.hpp +2 -2
  238. package/include/numkong/types.h +287 -49
  239. package/include/numkong/types.hpp +436 -12
  240. package/include/numkong/vector.hpp +82 -14
  241. package/javascript/dist/cjs/numkong-wasm.js +6 -12
  242. package/javascript/dist/cjs/numkong.d.ts +7 -1
  243. package/javascript/dist/cjs/numkong.js +37 -11
  244. package/javascript/dist/cjs/types.d.ts +9 -0
  245. package/javascript/dist/cjs/types.js +96 -0
  246. package/javascript/dist/esm/numkong-browser.d.ts +14 -0
  247. package/javascript/dist/esm/numkong-browser.js +23 -0
  248. package/javascript/dist/esm/numkong-wasm.js +6 -12
  249. package/javascript/dist/esm/numkong.d.ts +7 -1
  250. package/javascript/dist/esm/numkong.js +37 -11
  251. package/javascript/dist/esm/types.d.ts +9 -0
  252. package/javascript/dist/esm/types.js +96 -0
  253. package/javascript/node-gyp-build.d.ts +4 -1
  254. package/javascript/numkong-browser.ts +40 -0
  255. package/javascript/numkong-wasm.ts +7 -13
  256. package/javascript/numkong.c +5 -26
  257. package/javascript/numkong.ts +36 -11
  258. package/javascript/tsconfig-base.json +1 -0
  259. package/javascript/tsconfig-cjs.json +6 -1
  260. package/javascript/types.ts +110 -0
  261. package/numkong.gypi +101 -0
  262. package/package.json +34 -13
  263. package/probes/arm_neon.c +8 -0
  264. package/probes/arm_neon_bfdot.c +9 -0
  265. package/probes/arm_neon_fhm.c +9 -0
  266. package/probes/arm_neon_half.c +8 -0
  267. package/probes/arm_neon_sdot.c +9 -0
  268. package/probes/arm_neonfp8.c +9 -0
  269. package/probes/arm_sme.c +16 -0
  270. package/probes/arm_sme2.c +16 -0
  271. package/probes/arm_sme2p1.c +16 -0
  272. package/probes/arm_sme_bf16.c +16 -0
  273. package/probes/arm_sme_bi32.c +16 -0
  274. package/probes/arm_sme_f64.c +16 -0
  275. package/probes/arm_sme_fa64.c +14 -0
  276. package/probes/arm_sme_half.c +16 -0
  277. package/probes/arm_sme_lut2.c +15 -0
  278. package/probes/arm_sve.c +18 -0
  279. package/probes/arm_sve2.c +20 -0
  280. package/probes/arm_sve2p1.c +18 -0
  281. package/probes/arm_sve_bfdot.c +20 -0
  282. package/probes/arm_sve_half.c +18 -0
  283. package/probes/arm_sve_sdot.c +21 -0
  284. package/probes/loongarch_lasx.c +12 -0
  285. package/probes/power_vsx.c +12 -0
  286. package/probes/probe.js +127 -0
  287. package/probes/riscv_rvv.c +14 -0
  288. package/probes/riscv_rvv_bb.c +15 -0
  289. package/probes/riscv_rvv_bf16.c +17 -0
  290. package/probes/riscv_rvv_half.c +14 -0
  291. package/probes/wasm_v128relaxed.c +11 -0
  292. package/probes/x86_alder.c +17 -0
  293. package/probes/x86_diamond.c +17 -0
  294. package/probes/x86_genoa.c +17 -0
  295. package/probes/x86_graniteamx.c +19 -0
  296. package/probes/x86_haswell.c +11 -0
  297. package/probes/x86_icelake.c +17 -0
  298. package/probes/x86_sapphire.c +16 -0
  299. package/probes/x86_sapphireamx.c +18 -0
  300. package/probes/x86_sierra.c +17 -0
  301. package/probes/x86_skylake.c +15 -0
  302. package/probes/x86_turin.c +17 -0
  303. package/wasm/numkong-emscripten.js +2 -0
  304. package/wasm/numkong.d.ts +14 -0
  305. package/wasm/numkong.js +1124 -0
  306. package/wasm/numkong.wasm +0 -0
  307. package/include/numkong/curved/neonhalf.h +0 -212
  308. package/include/numkong/dot/neonhalf.h +0 -198
  309. package/include/numkong/dots/neonhalf.h +0 -57
  310. package/include/numkong/mesh/neonhalf.h +0 -616
  311. package/include/numkong/reduce/neonhalf.h +0 -157
  312. package/include/numkong/spatial/neonhalf.h +0 -118
  313. package/include/numkong/spatial/sapphire.h +0 -343
  314. package/include/numkong/spatials/neonhalf.h +0 -58
  315. package/javascript/README.md +0 -246
package/c/dispatch_e2m3.c CHANGED
@@ -15,6 +15,9 @@ void nk_dispatch_e2m3_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_pun
15
15
  #if NK_TARGET_V128RELAXED
16
16
  if (v & nk_cap_v128relaxed_k) switch (k) {
17
17
  case nk_kernel_dot_k: *m = (m_t)&nk_dot_e2m3_v128relaxed, *c = nk_cap_v128relaxed_k; return;
18
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_e2m3_v128relaxed, *c = nk_cap_v128relaxed_k; return;
19
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e2m3_v128relaxed, *c = nk_cap_v128relaxed_k; return;
20
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e2m3_v128relaxed, *c = nk_cap_v128relaxed_k; return;
18
21
  case nk_kernel_reduce_moments_k:
19
22
  *m = (m_t)&nk_reduce_moments_e2m3_v128relaxed, *c = nk_cap_v128relaxed_k;
20
23
  return;
@@ -55,10 +58,45 @@ void nk_dispatch_e2m3_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_pun
55
58
  default: break;
56
59
  }
57
60
  #endif
61
+ #if NK_TARGET_NEONFP8
62
+ if (v & nk_cap_neonfp8_k) switch (k) {
63
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_e2m3_neonfp8, *c = nk_cap_neonfp8_k; return;
64
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_e2m3_neonfp8, *c = nk_cap_neonfp8_k; return;
65
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e2m3_neonfp8, *c = nk_cap_neonfp8_k; return;
66
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e2m3_neonfp8, *c = nk_cap_neonfp8_k; return;
67
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_e2m3_neonfp8, *c = nk_cap_neonfp8_k; return;
68
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_e2m3_neonfp8, *c = nk_cap_neonfp8_k; return;
69
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_e2m3_neonfp8, *c = nk_cap_neonfp8_k; return;
70
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_e2m3_neonfp8, *c = nk_cap_neonfp8_k; return;
71
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_e2m3_neonfp8, *c = nk_cap_neonfp8_k; return;
72
+ case nk_kernel_angulars_symmetric_k:
73
+ *m = (m_t)&nk_angulars_symmetric_e2m3_neonfp8, *c = nk_cap_neonfp8_k;
74
+ return;
75
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_e2m3_neonfp8, *c = nk_cap_neonfp8_k; return;
76
+ case nk_kernel_euclideans_symmetric_k:
77
+ *m = (m_t)&nk_euclideans_symmetric_e2m3_neonfp8, *c = nk_cap_neonfp8_k;
78
+ return;
79
+ default: break;
80
+ }
81
+ #endif
58
82
  #if NK_TARGET_NEONSDOT
59
83
  if (v & nk_cap_neonsdot_k) switch (k) {
60
84
  case nk_kernel_dot_k: *m = (m_t)&nk_dot_e2m3_neonsdot, *c = nk_cap_neonsdot_k; return;
61
85
  case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_e2m3_neonsdot, *c = nk_cap_neonsdot_k; return;
86
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_e2m3_neonsdot, *c = nk_cap_neonsdot_k; return;
87
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_e2m3_neonsdot, *c = nk_cap_neonsdot_k; return;
88
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_e2m3_neonsdot, *c = nk_cap_neonsdot_k; return;
89
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_e2m3_neonsdot, *c = nk_cap_neonsdot_k; return;
90
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_e2m3_neonsdot, *c = nk_cap_neonsdot_k; return;
91
+ case nk_kernel_angulars_symmetric_k:
92
+ *m = (m_t)&nk_angulars_symmetric_e2m3_neonsdot, *c = nk_cap_neonsdot_k;
93
+ return;
94
+ case nk_kernel_euclideans_packed_k:
95
+ *m = (m_t)&nk_euclideans_packed_e2m3_neonsdot, *c = nk_cap_neonsdot_k;
96
+ return;
97
+ case nk_kernel_euclideans_symmetric_k:
98
+ *m = (m_t)&nk_euclideans_symmetric_e2m3_neonsdot, *c = nk_cap_neonsdot_k;
99
+ return;
62
100
  default: break;
63
101
  }
64
102
  #endif
@@ -98,17 +136,12 @@ void nk_dispatch_e2m3_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_pun
98
136
  default: break;
99
137
  }
100
138
  #endif
101
- #if NK_TARGET_SAPPHIRE
102
- if (v & nk_cap_sapphire_k) switch (k) {
103
- case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e2m3_sapphire, *c = nk_cap_sapphire_k; return;
104
- case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e2m3_sapphire, *c = nk_cap_sapphire_k; return;
105
- case nk_kernel_angular_k: *m = (m_t)&nk_angular_e2m3_sapphire, *c = nk_cap_sapphire_k; return;
106
- default: break;
107
- }
108
- #endif
109
139
  #if NK_TARGET_ICELAKE
110
140
  if (v & nk_cap_icelake_k) switch (k) {
111
141
  case nk_kernel_dot_k: *m = (m_t)&nk_dot_e2m3_icelake, *c = nk_cap_icelake_k; return;
142
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e2m3_icelake, *c = nk_cap_icelake_k; return;
143
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e2m3_icelake, *c = nk_cap_icelake_k; return;
144
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_e2m3_icelake, *c = nk_cap_icelake_k; return;
112
145
  case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_e2m3_icelake, *c = nk_cap_icelake_k; return;
113
146
  default: break;
114
147
  }
package/c/dispatch_e3m2.c CHANGED
@@ -15,6 +15,9 @@ void nk_dispatch_e3m2_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_pun
15
15
  #if NK_TARGET_V128RELAXED
16
16
  if (v & nk_cap_v128relaxed_k) switch (k) {
17
17
  case nk_kernel_dot_k: *m = (m_t)&nk_dot_e3m2_v128relaxed, *c = nk_cap_v128relaxed_k; return;
18
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_e3m2_v128relaxed, *c = nk_cap_v128relaxed_k; return;
19
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e3m2_v128relaxed, *c = nk_cap_v128relaxed_k; return;
20
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e3m2_v128relaxed, *c = nk_cap_v128relaxed_k; return;
18
21
  case nk_kernel_reduce_moments_k:
19
22
  *m = (m_t)&nk_reduce_moments_e3m2_v128relaxed, *c = nk_cap_v128relaxed_k;
20
23
  return;
@@ -55,9 +58,44 @@ void nk_dispatch_e3m2_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_pun
55
58
  default: break;
56
59
  }
57
60
  #endif
61
+ #if NK_TARGET_NEONFP8
62
+ if (v & nk_cap_neonfp8_k) switch (k) {
63
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_e3m2_neonfp8, *c = nk_cap_neonfp8_k; return;
64
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_e3m2_neonfp8, *c = nk_cap_neonfp8_k; return;
65
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e3m2_neonfp8, *c = nk_cap_neonfp8_k; return;
66
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e3m2_neonfp8, *c = nk_cap_neonfp8_k; return;
67
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_e3m2_neonfp8, *c = nk_cap_neonfp8_k; return;
68
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_e3m2_neonfp8, *c = nk_cap_neonfp8_k; return;
69
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_e3m2_neonfp8, *c = nk_cap_neonfp8_k; return;
70
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_e3m2_neonfp8, *c = nk_cap_neonfp8_k; return;
71
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_e3m2_neonfp8, *c = nk_cap_neonfp8_k; return;
72
+ case nk_kernel_angulars_symmetric_k:
73
+ *m = (m_t)&nk_angulars_symmetric_e3m2_neonfp8, *c = nk_cap_neonfp8_k;
74
+ return;
75
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_e3m2_neonfp8, *c = nk_cap_neonfp8_k; return;
76
+ case nk_kernel_euclideans_symmetric_k:
77
+ *m = (m_t)&nk_euclideans_symmetric_e3m2_neonfp8, *c = nk_cap_neonfp8_k;
78
+ return;
79
+ default: break;
80
+ }
81
+ #endif
58
82
  #if NK_TARGET_NEONSDOT
59
83
  if (v & nk_cap_neonsdot_k) switch (k) {
60
84
  case nk_kernel_dot_k: *m = (m_t)&nk_dot_e3m2_neonsdot, *c = nk_cap_neonsdot_k; return;
85
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_e3m2_neonsdot, *c = nk_cap_neonsdot_k; return;
86
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_e3m2_neonsdot, *c = nk_cap_neonsdot_k; return;
87
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_e3m2_neonsdot, *c = nk_cap_neonsdot_k; return;
88
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_e3m2_neonsdot, *c = nk_cap_neonsdot_k; return;
89
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_e3m2_neonsdot, *c = nk_cap_neonsdot_k; return;
90
+ case nk_kernel_angulars_symmetric_k:
91
+ *m = (m_t)&nk_angulars_symmetric_e3m2_neonsdot, *c = nk_cap_neonsdot_k;
92
+ return;
93
+ case nk_kernel_euclideans_packed_k:
94
+ *m = (m_t)&nk_euclideans_packed_e3m2_neonsdot, *c = nk_cap_neonsdot_k;
95
+ return;
96
+ case nk_kernel_euclideans_symmetric_k:
97
+ *m = (m_t)&nk_euclideans_symmetric_e3m2_neonsdot, *c = nk_cap_neonsdot_k;
98
+ return;
61
99
  default: break;
62
100
  }
63
101
  #endif
@@ -97,17 +135,12 @@ void nk_dispatch_e3m2_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_pun
97
135
  default: break;
98
136
  }
99
137
  #endif
100
- #if NK_TARGET_SAPPHIRE
101
- if (v & nk_cap_sapphire_k) switch (k) {
102
- case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e3m2_sapphire, *c = nk_cap_sapphire_k; return;
103
- case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e3m2_sapphire, *c = nk_cap_sapphire_k; return;
104
- case nk_kernel_angular_k: *m = (m_t)&nk_angular_e3m2_sapphire, *c = nk_cap_sapphire_k; return;
105
- default: break;
106
- }
107
- #endif
108
138
  #if NK_TARGET_ICELAKE
109
139
  if (v & nk_cap_icelake_k) switch (k) {
110
140
  case nk_kernel_dot_k: *m = (m_t)&nk_dot_e3m2_icelake, *c = nk_cap_icelake_k; return;
141
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e3m2_icelake, *c = nk_cap_icelake_k; return;
142
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e3m2_icelake, *c = nk_cap_icelake_k; return;
143
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_e3m2_icelake, *c = nk_cap_icelake_k; return;
111
144
  case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_e3m2_icelake, *c = nk_cap_icelake_k; return;
112
145
  default: break;
113
146
  }
@@ -135,6 +168,14 @@ void nk_dispatch_e3m2_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_pun
135
168
  default: break;
136
169
  }
137
170
  #endif
171
+ #if NK_TARGET_SIERRA
172
+ if (v & nk_cap_sierra_k) switch (k) {
173
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_e3m2_sierra, *c = nk_cap_sierra_k; return;
174
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e3m2_sierra, *c = nk_cap_sierra_k; return;
175
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e3m2_sierra, *c = nk_cap_sierra_k; return;
176
+ default: break;
177
+ }
178
+ #endif
138
179
  #if NK_TARGET_ALDER
139
180
  if (v & nk_cap_alder_k) switch (k) {
140
181
  case nk_kernel_angular_k: *m = (m_t)&nk_angular_e3m2_alder, *c = nk_cap_alder_k; return;
package/c/dispatch_e4m3.c CHANGED
@@ -15,6 +15,9 @@ void nk_dispatch_e4m3_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_pun
15
15
  return;
16
16
  case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_e4m3_v128relaxed, *c = nk_cap_v128relaxed_k; return;
17
17
  case nk_kernel_dot_k: *m = (m_t)&nk_dot_e4m3_v128relaxed, *c = nk_cap_v128relaxed_k; return;
18
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_e4m3_v128relaxed, *c = nk_cap_v128relaxed_k; return;
19
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e4m3_v128relaxed, *c = nk_cap_v128relaxed_k; return;
20
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e4m3_v128relaxed, *c = nk_cap_v128relaxed_k; return;
18
21
  case nk_kernel_dots_packed_size_k:
19
22
  *m = (m_t)&nk_dots_packed_size_e4m3_v128relaxed, *c = nk_cap_v128relaxed_k;
20
23
  return;
@@ -51,11 +54,31 @@ void nk_dispatch_e4m3_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_pun
51
54
  default: break;
52
55
  }
53
56
  #endif
57
+ #if NK_TARGET_NEONFP8
58
+ if (v & nk_cap_neonfp8_k) switch (k) {
59
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_e4m3_neonfp8, *c = nk_cap_neonfp8_k; return;
60
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_e4m3_neonfp8, *c = nk_cap_neonfp8_k; return;
61
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e4m3_neonfp8, *c = nk_cap_neonfp8_k; return;
62
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e4m3_neonfp8, *c = nk_cap_neonfp8_k; return;
63
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_e4m3_neonfp8, *c = nk_cap_neonfp8_k; return;
64
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_e4m3_neonfp8, *c = nk_cap_neonfp8_k; return;
65
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_e4m3_neonfp8, *c = nk_cap_neonfp8_k; return;
66
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_e4m3_neonfp8, *c = nk_cap_neonfp8_k; return;
67
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_e4m3_neonfp8, *c = nk_cap_neonfp8_k; return;
68
+ case nk_kernel_angulars_symmetric_k:
69
+ *m = (m_t)&nk_angulars_symmetric_e4m3_neonfp8, *c = nk_cap_neonfp8_k;
70
+ return;
71
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_e4m3_neonfp8, *c = nk_cap_neonfp8_k; return;
72
+ case nk_kernel_euclideans_symmetric_k:
73
+ *m = (m_t)&nk_euclideans_symmetric_e4m3_neonfp8, *c = nk_cap_neonfp8_k;
74
+ return;
75
+ default: break;
76
+ }
77
+ #endif
54
78
  #if NK_TARGET_NEONFHM
55
79
  if (v & nk_cap_neonfhm_k) switch (k) {
56
80
  case nk_kernel_dot_k: *m = (m_t)&nk_dot_e4m3_neonfhm, *c = nk_cap_neonfhm_k; return;
57
81
  case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_e4m3_neonfhm, *c = nk_cap_neonfhm_k; return;
58
- case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_e4m3_neonfhm, *c = nk_cap_neonfhm_k; return;
59
82
  case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_e4m3_neonfhm, *c = nk_cap_neonfhm_k; return;
60
83
  case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_e4m3_neonfhm, *c = nk_cap_neonfhm_k; return;
61
84
  case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_e4m3_neonfhm, *c = nk_cap_neonfhm_k; return;
@@ -115,19 +138,38 @@ void nk_dispatch_e4m3_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_pun
115
138
  default: break;
116
139
  }
117
140
  #endif
118
- #if NK_TARGET_SAPPHIRE
119
- if (v & nk_cap_sapphire_k) switch (k) {
120
- case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e4m3_sapphire, *c = nk_cap_sapphire_k; return;
121
- case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e4m3_sapphire, *c = nk_cap_sapphire_k; return;
141
+ #if NK_TARGET_DIAMOND
142
+ if (v & nk_cap_diamond_k) switch (k) {
143
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_e4m3_diamond, *c = nk_cap_diamond_k; return;
144
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e4m3_diamond, *c = nk_cap_diamond_k; return;
145
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e4m3_diamond, *c = nk_cap_diamond_k; return;
146
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_e4m3_diamond, *c = nk_cap_diamond_k; return;
147
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_e4m3_diamond, *c = nk_cap_diamond_k; return;
148
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_e4m3_diamond, *c = nk_cap_diamond_k; return;
149
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_e4m3_diamond, *c = nk_cap_diamond_k; return;
150
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_e4m3_diamond, *c = nk_cap_diamond_k; return;
151
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_e4m3_diamond, *c = nk_cap_diamond_k; return;
152
+ case nk_kernel_angulars_symmetric_k:
153
+ *m = (m_t)&nk_angulars_symmetric_e4m3_diamond, *c = nk_cap_diamond_k;
154
+ return;
155
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_e4m3_diamond, *c = nk_cap_diamond_k; return;
156
+ case nk_kernel_euclideans_symmetric_k:
157
+ *m = (m_t)&nk_euclideans_symmetric_e4m3_diamond, *c = nk_cap_diamond_k;
158
+ return;
159
+ default: break;
160
+ }
161
+ #endif
162
+ #if NK_TARGET_ICELAKE
163
+ if (v & nk_cap_icelake_k) switch (k) {
164
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_e4m3_icelake, *c = nk_cap_icelake_k; return;
165
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e4m3_icelake, *c = nk_cap_icelake_k; return;
166
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e4m3_icelake, *c = nk_cap_icelake_k; return;
167
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_e4m3_icelake, *c = nk_cap_icelake_k; return;
122
168
  default: break;
123
169
  }
124
170
  #endif
125
171
  #if NK_TARGET_GENOA
126
172
  if (v & nk_cap_genoa_k) switch (k) {
127
- case nk_kernel_dot_k: *m = (m_t)&nk_dot_e4m3_genoa, *c = nk_cap_genoa_k; return;
128
- case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e4m3_genoa, *c = nk_cap_genoa_k; return;
129
- case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e4m3_genoa, *c = nk_cap_genoa_k; return;
130
- case nk_kernel_angular_k: *m = (m_t)&nk_angular_e4m3_genoa, *c = nk_cap_genoa_k; return;
131
173
  case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_e4m3_genoa, *c = nk_cap_genoa_k; return;
132
174
  case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_e4m3_genoa, *c = nk_cap_genoa_k; return;
133
175
  case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_e4m3_genoa, *c = nk_cap_genoa_k; return;
package/c/dispatch_e5m2.c CHANGED
@@ -15,6 +15,9 @@ void nk_dispatch_e5m2_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_pun
15
15
  return;
16
16
  case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_e5m2_v128relaxed, *c = nk_cap_v128relaxed_k; return;
17
17
  case nk_kernel_dot_k: *m = (m_t)&nk_dot_e5m2_v128relaxed, *c = nk_cap_v128relaxed_k; return;
18
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_e5m2_v128relaxed, *c = nk_cap_v128relaxed_k; return;
19
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e5m2_v128relaxed, *c = nk_cap_v128relaxed_k; return;
20
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e5m2_v128relaxed, *c = nk_cap_v128relaxed_k; return;
18
21
  case nk_kernel_dots_packed_size_k:
19
22
  *m = (m_t)&nk_dots_packed_size_e5m2_v128relaxed, *c = nk_cap_v128relaxed_k;
20
23
  return;
@@ -51,11 +54,31 @@ void nk_dispatch_e5m2_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_pun
51
54
  default: break;
52
55
  }
53
56
  #endif
57
+ #if NK_TARGET_NEONFP8
58
+ if (v & nk_cap_neonfp8_k) switch (k) {
59
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_e5m2_neonfp8, *c = nk_cap_neonfp8_k; return;
60
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_e5m2_neonfp8, *c = nk_cap_neonfp8_k; return;
61
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e5m2_neonfp8, *c = nk_cap_neonfp8_k; return;
62
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e5m2_neonfp8, *c = nk_cap_neonfp8_k; return;
63
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_e5m2_neonfp8, *c = nk_cap_neonfp8_k; return;
64
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_e5m2_neonfp8, *c = nk_cap_neonfp8_k; return;
65
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_e5m2_neonfp8, *c = nk_cap_neonfp8_k; return;
66
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_e5m2_neonfp8, *c = nk_cap_neonfp8_k; return;
67
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_e5m2_neonfp8, *c = nk_cap_neonfp8_k; return;
68
+ case nk_kernel_angulars_symmetric_k:
69
+ *m = (m_t)&nk_angulars_symmetric_e5m2_neonfp8, *c = nk_cap_neonfp8_k;
70
+ return;
71
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_e5m2_neonfp8, *c = nk_cap_neonfp8_k; return;
72
+ case nk_kernel_euclideans_symmetric_k:
73
+ *m = (m_t)&nk_euclideans_symmetric_e5m2_neonfp8, *c = nk_cap_neonfp8_k;
74
+ return;
75
+ default: break;
76
+ }
77
+ #endif
54
78
  #if NK_TARGET_NEONFHM
55
79
  if (v & nk_cap_neonfhm_k) switch (k) {
56
80
  case nk_kernel_dot_k: *m = (m_t)&nk_dot_e5m2_neonfhm, *c = nk_cap_neonfhm_k; return;
57
81
  case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_e5m2_neonfhm, *c = nk_cap_neonfhm_k; return;
58
- case nk_kernel_reduce_minmax_k: *m = (m_t)&nk_reduce_minmax_e5m2_neonfhm, *c = nk_cap_neonfhm_k; return;
59
82
  case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_e5m2_neonfhm, *c = nk_cap_neonfhm_k; return;
60
83
  case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_e5m2_neonfhm, *c = nk_cap_neonfhm_k; return;
61
84
  case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_e5m2_neonfhm, *c = nk_cap_neonfhm_k; return;
@@ -115,6 +138,27 @@ void nk_dispatch_e5m2_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_pun
115
138
  default: break;
116
139
  }
117
140
  #endif
141
+ #if NK_TARGET_DIAMOND
142
+ if (v & nk_cap_diamond_k) switch (k) {
143
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_e5m2_diamond, *c = nk_cap_diamond_k; return;
144
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_e5m2_diamond, *c = nk_cap_diamond_k; return;
145
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_e5m2_diamond, *c = nk_cap_diamond_k; return;
146
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_e5m2_diamond, *c = nk_cap_diamond_k; return;
147
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_e5m2_diamond, *c = nk_cap_diamond_k; return;
148
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_e5m2_diamond, *c = nk_cap_diamond_k; return;
149
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_e5m2_diamond, *c = nk_cap_diamond_k; return;
150
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_e5m2_diamond, *c = nk_cap_diamond_k; return;
151
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_e5m2_diamond, *c = nk_cap_diamond_k; return;
152
+ case nk_kernel_angulars_symmetric_k:
153
+ *m = (m_t)&nk_angulars_symmetric_e5m2_diamond, *c = nk_cap_diamond_k;
154
+ return;
155
+ case nk_kernel_euclideans_packed_k: *m = (m_t)&nk_euclideans_packed_e5m2_diamond, *c = nk_cap_diamond_k; return;
156
+ case nk_kernel_euclideans_symmetric_k:
157
+ *m = (m_t)&nk_euclideans_symmetric_e5m2_diamond, *c = nk_cap_diamond_k;
158
+ return;
159
+ default: break;
160
+ }
161
+ #endif
118
162
  #if NK_TARGET_GENOA
119
163
  if (v & nk_cap_genoa_k) switch (k) {
120
164
  case nk_kernel_dot_k: *m = (m_t)&nk_dot_e5m2_genoa, *c = nk_cap_genoa_k; return;
package/c/dispatch_f16.c CHANGED
@@ -43,6 +43,10 @@ void nk_dispatch_f16_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_punn
43
43
  case nk_kernel_euclideans_symmetric_k:
44
44
  *m = (m_t)&nk_euclideans_symmetric_f16_v128relaxed, *c = nk_cap_v128relaxed_k;
45
45
  return;
46
+ case nk_kernel_each_sum_k: *m = (m_t)&nk_each_sum_f16_v128relaxed, *c = nk_cap_v128relaxed_k; return;
47
+ case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_f16_v128relaxed, *c = nk_cap_v128relaxed_k; return;
48
+ case nk_kernel_each_blend_k: *m = (m_t)&nk_each_blend_f16_v128relaxed, *c = nk_cap_v128relaxed_k; return;
49
+ case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_f16_v128relaxed, *c = nk_cap_v128relaxed_k; return;
46
50
  default: break;
47
51
  }
48
52
  #endif
@@ -91,42 +95,27 @@ void nk_dispatch_f16_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_punn
91
95
  #endif
92
96
  #if NK_TARGET_NEONHALF
93
97
  if (v & nk_cap_neonhalf_k) switch (k) {
94
- case nk_kernel_dot_k: *m = (m_t)&nk_dot_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
95
- case nk_kernel_angular_k: *m = (m_t)&nk_angular_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
96
- case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
97
- case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
98
- case nk_kernel_jsd_k: *m = (m_t)&nk_jsd_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
99
- case nk_kernel_kld_k: *m = (m_t)&nk_kld_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
100
- case nk_kernel_bilinear_k: *m = (m_t)&nk_bilinear_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
101
- case nk_kernel_mahalanobis_k: *m = (m_t)&nk_mahalanobis_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
102
98
  case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
103
99
  case nk_kernel_each_blend_k: *m = (m_t)&nk_each_blend_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
104
100
  case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
105
101
  case nk_kernel_each_sum_k: *m = (m_t)&nk_each_sum_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
106
- case nk_kernel_kabsch_k: *m = (m_t)&nk_kabsch_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
107
- case nk_kernel_rmsd_k: *m = (m_t)&nk_rmsd_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
108
- case nk_kernel_umeyama_k: *m = (m_t)&nk_umeyama_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
109
- case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
110
- case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
111
- case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
112
- case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
113
- case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
114
- case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_f16_neonhalf, *c = nk_cap_neonhalf_k; return;
115
- case nk_kernel_angulars_symmetric_k:
116
- *m = (m_t)&nk_angulars_symmetric_f16_neonhalf, *c = nk_cap_neonhalf_k;
117
- return;
118
- case nk_kernel_euclideans_packed_k:
119
- *m = (m_t)&nk_euclideans_packed_f16_neonhalf, *c = nk_cap_neonhalf_k;
120
- return;
121
- case nk_kernel_euclideans_symmetric_k:
122
- *m = (m_t)&nk_euclideans_symmetric_f16_neonhalf, *c = nk_cap_neonhalf_k;
123
- return;
124
102
  default: break;
125
103
  }
126
104
  #endif
127
105
  #if NK_TARGET_NEON
128
106
  if (v & nk_cap_neon_k) switch (k) {
129
107
  case nk_kernel_dot_k: *m = (m_t)&nk_dot_f16_neon, *c = nk_cap_neon_k; return;
108
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_f16_neon, *c = nk_cap_neon_k; return;
109
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_f16_neon, *c = nk_cap_neon_k; return;
110
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_f16_neon, *c = nk_cap_neon_k; return;
111
+ case nk_kernel_jsd_k: *m = (m_t)&nk_jsd_f16_neon, *c = nk_cap_neon_k; return;
112
+ case nk_kernel_kld_k: *m = (m_t)&nk_kld_f16_neon, *c = nk_cap_neon_k; return;
113
+ case nk_kernel_bilinear_k: *m = (m_t)&nk_bilinear_f16_neon, *c = nk_cap_neon_k; return;
114
+ case nk_kernel_mahalanobis_k: *m = (m_t)&nk_mahalanobis_f16_neon, *c = nk_cap_neon_k; return;
115
+ case nk_kernel_kabsch_k: *m = (m_t)&nk_kabsch_f16_neon, *c = nk_cap_neon_k; return;
116
+ case nk_kernel_rmsd_k: *m = (m_t)&nk_rmsd_f16_neon, *c = nk_cap_neon_k; return;
117
+ case nk_kernel_umeyama_k: *m = (m_t)&nk_umeyama_f16_neon, *c = nk_cap_neon_k; return;
118
+ case nk_kernel_reduce_moments_k: *m = (m_t)&nk_reduce_moments_f16_neon, *c = nk_cap_neon_k; return;
130
119
  case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_f16_neon, *c = nk_cap_neon_k; return;
131
120
  case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_f16_neon, *c = nk_cap_neon_k; return;
132
121
  case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_f16_neon, *c = nk_cap_neon_k; return;
@@ -174,6 +163,15 @@ void nk_dispatch_f16_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_punn
174
163
  default: break;
175
164
  }
176
165
  #endif
166
+ #if NK_TARGET_DIAMOND
167
+ if (v & nk_cap_diamond_k) switch (k) {
168
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_f16_diamond, *c = nk_cap_diamond_k; return;
169
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_f16_diamond, *c = nk_cap_diamond_k; return;
170
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_f16_diamond, *c = nk_cap_diamond_k; return;
171
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_f16_diamond, *c = nk_cap_diamond_k; return;
172
+ default: break;
173
+ }
174
+ #endif
177
175
  #if NK_TARGET_SKYLAKE
178
176
  if (v & nk_cap_skylake_k) switch (k) {
179
177
  case nk_kernel_dot_k: *m = (m_t)&nk_dot_f16_skylake, *c = nk_cap_skylake_k; return;
@@ -202,6 +200,9 @@ void nk_dispatch_f16_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_punn
202
200
  case nk_kernel_each_blend_k: *m = (m_t)&nk_each_blend_f16_skylake, *c = nk_cap_skylake_k; return;
203
201
  case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_f16_skylake, *c = nk_cap_skylake_k; return;
204
202
  case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_f16_skylake, *c = nk_cap_skylake_k; return;
203
+ case nk_kernel_rmsd_k: *m = (m_t)&nk_rmsd_f16_skylake, *c = nk_cap_skylake_k; return;
204
+ case nk_kernel_kabsch_k: *m = (m_t)&nk_kabsch_f16_skylake, *c = nk_cap_skylake_k; return;
205
+ case nk_kernel_umeyama_k: *m = (m_t)&nk_umeyama_f16_skylake, *c = nk_cap_skylake_k; return;
205
206
  default: break;
206
207
  }
207
208
  #endif
@@ -252,6 +253,58 @@ void nk_dispatch_f16_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_punn
252
253
  default: break;
253
254
  }
254
255
  #endif
256
+ #if NK_TARGET_POWERVSX
257
+ if (v & nk_cap_powervsx_k) switch (k) {
258
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_f16_powervsx, *c = nk_cap_powervsx_k; return;
259
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_f16_powervsx, *c = nk_cap_powervsx_k; return;
260
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_f16_powervsx, *c = nk_cap_powervsx_k; return;
261
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_f16_powervsx, *c = nk_cap_powervsx_k; return;
262
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_f16_powervsx, *c = nk_cap_powervsx_k; return;
263
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_f16_powervsx, *c = nk_cap_powervsx_k; return;
264
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_f16_powervsx, *c = nk_cap_powervsx_k; return;
265
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_f16_powervsx, *c = nk_cap_powervsx_k; return;
266
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_f16_powervsx, *c = nk_cap_powervsx_k; return;
267
+ case nk_kernel_angulars_symmetric_k:
268
+ *m = (m_t)&nk_angulars_symmetric_f16_powervsx, *c = nk_cap_powervsx_k;
269
+ return;
270
+ case nk_kernel_euclideans_packed_k:
271
+ *m = (m_t)&nk_euclideans_packed_f16_powervsx, *c = nk_cap_powervsx_k;
272
+ return;
273
+ case nk_kernel_euclideans_symmetric_k:
274
+ *m = (m_t)&nk_euclideans_symmetric_f16_powervsx, *c = nk_cap_powervsx_k;
275
+ return;
276
+ default: break;
277
+ }
278
+ #endif
279
+ #if NK_TARGET_LOONGSONASX
280
+ if (v & nk_cap_loongsonasx_k) switch (k) {
281
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_f16_loongsonasx, *c = nk_cap_loongsonasx_k; return;
282
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_f16_loongsonasx, *c = nk_cap_loongsonasx_k; return;
283
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_f16_loongsonasx, *c = nk_cap_loongsonasx_k; return;
284
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_f16_loongsonasx, *c = nk_cap_loongsonasx_k; return;
285
+ case nk_kernel_dots_packed_size_k:
286
+ *m = (m_t)&nk_dots_packed_size_f16_loongsonasx, *c = nk_cap_loongsonasx_k;
287
+ return;
288
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_f16_loongsonasx, *c = nk_cap_loongsonasx_k; return;
289
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_f16_loongsonasx, *c = nk_cap_loongsonasx_k; return;
290
+ case nk_kernel_dots_symmetric_k:
291
+ *m = (m_t)&nk_dots_symmetric_f16_loongsonasx, *c = nk_cap_loongsonasx_k;
292
+ return;
293
+ case nk_kernel_angulars_packed_k:
294
+ *m = (m_t)&nk_angulars_packed_f16_loongsonasx, *c = nk_cap_loongsonasx_k;
295
+ return;
296
+ case nk_kernel_angulars_symmetric_k:
297
+ *m = (m_t)&nk_angulars_symmetric_f16_loongsonasx, *c = nk_cap_loongsonasx_k;
298
+ return;
299
+ case nk_kernel_euclideans_packed_k:
300
+ *m = (m_t)&nk_euclideans_packed_f16_loongsonasx, *c = nk_cap_loongsonasx_k;
301
+ return;
302
+ case nk_kernel_euclideans_symmetric_k:
303
+ *m = (m_t)&nk_euclideans_symmetric_f16_loongsonasx, *c = nk_cap_loongsonasx_k;
304
+ return;
305
+ default: break;
306
+ }
307
+ #endif
255
308
  #if NK_TARGET_RVVHALF
256
309
  if (v & nk_cap_rvvhalf_k) switch (k) {
257
310
  case nk_kernel_dot_k: *m = (m_t)&nk_dot_f16_rvvhalf, *c = nk_cap_rvvhalf_k; return;
package/c/dispatch_f16c.c CHANGED
@@ -22,11 +22,11 @@ void nk_dispatch_f16c_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_pun
22
22
  default: break;
23
23
  }
24
24
  #endif
25
- #if NK_TARGET_NEONHALF
26
- if (v & nk_cap_neonhalf_k) switch (k) {
27
- case nk_kernel_dot_k: *m = (m_t)&nk_dot_f16c_neonhalf, *c = nk_cap_neonhalf_k; return;
28
- case nk_kernel_vdot_k: *m = (m_t)&nk_vdot_f16c_neonhalf, *c = nk_cap_neonhalf_k; return;
29
- case nk_kernel_bilinear_k: *m = (m_t)&nk_bilinear_f16c_neonhalf, *c = nk_cap_neonbfdot_k; return;
25
+ #if NK_TARGET_NEON
26
+ if (v & nk_cap_neon_k) switch (k) {
27
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_f16c_neon, *c = nk_cap_neon_k; return;
28
+ case nk_kernel_vdot_k: *m = (m_t)&nk_vdot_f16c_neon, *c = nk_cap_neon_k; return;
29
+ case nk_kernel_bilinear_k: *m = (m_t)&nk_bilinear_f16c_neon, *c = nk_cap_neon_k; return;
30
30
  default: break;
31
31
  }
32
32
  #endif
package/c/dispatch_f32.c CHANGED
@@ -51,6 +51,10 @@ void nk_dispatch_f32_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_punn
51
51
  case nk_kernel_rmsd_k: *m = (m_t)&nk_rmsd_f32_v128relaxed, *c = nk_cap_v128relaxed_k; return;
52
52
  case nk_kernel_kabsch_k: *m = (m_t)&nk_kabsch_f32_v128relaxed, *c = nk_cap_v128relaxed_k; return;
53
53
  case nk_kernel_umeyama_k: *m = (m_t)&nk_umeyama_f32_v128relaxed, *c = nk_cap_v128relaxed_k; return;
54
+ case nk_kernel_each_sum_k: *m = (m_t)&nk_each_sum_f32_v128relaxed, *c = nk_cap_v128relaxed_k; return;
55
+ case nk_kernel_each_scale_k: *m = (m_t)&nk_each_scale_f32_v128relaxed, *c = nk_cap_v128relaxed_k; return;
56
+ case nk_kernel_each_blend_k: *m = (m_t)&nk_each_blend_f32_v128relaxed, *c = nk_cap_v128relaxed_k; return;
57
+ case nk_kernel_each_fma_k: *m = (m_t)&nk_each_fma_f32_v128relaxed, *c = nk_cap_v128relaxed_k; return;
54
58
  default: break;
55
59
  }
56
60
  #endif
@@ -255,6 +259,58 @@ void nk_dispatch_f32_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_punn
255
259
  default: break;
256
260
  }
257
261
  #endif
262
+ #if NK_TARGET_POWERVSX
263
+ if (v & nk_cap_powervsx_k) switch (k) {
264
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_f32_powervsx, *c = nk_cap_powervsx_k; return;
265
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_f32_powervsx, *c = nk_cap_powervsx_k; return;
266
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_f32_powervsx, *c = nk_cap_powervsx_k; return;
267
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_f32_powervsx, *c = nk_cap_powervsx_k; return;
268
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_f32_powervsx, *c = nk_cap_powervsx_k; return;
269
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_f32_powervsx, *c = nk_cap_powervsx_k; return;
270
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_f32_powervsx, *c = nk_cap_powervsx_k; return;
271
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_f32_powervsx, *c = nk_cap_powervsx_k; return;
272
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_f32_powervsx, *c = nk_cap_powervsx_k; return;
273
+ case nk_kernel_angulars_symmetric_k:
274
+ *m = (m_t)&nk_angulars_symmetric_f32_powervsx, *c = nk_cap_powervsx_k;
275
+ return;
276
+ case nk_kernel_euclideans_packed_k:
277
+ *m = (m_t)&nk_euclideans_packed_f32_powervsx, *c = nk_cap_powervsx_k;
278
+ return;
279
+ case nk_kernel_euclideans_symmetric_k:
280
+ *m = (m_t)&nk_euclideans_symmetric_f32_powervsx, *c = nk_cap_powervsx_k;
281
+ return;
282
+ default: break;
283
+ }
284
+ #endif
285
+ #if NK_TARGET_LOONGSONASX
286
+ if (v & nk_cap_loongsonasx_k) switch (k) {
287
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_f32_loongsonasx, *c = nk_cap_loongsonasx_k; return;
288
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_f32_loongsonasx, *c = nk_cap_loongsonasx_k; return;
289
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_f32_loongsonasx, *c = nk_cap_loongsonasx_k; return;
290
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_f32_loongsonasx, *c = nk_cap_loongsonasx_k; return;
291
+ case nk_kernel_dots_packed_size_k:
292
+ *m = (m_t)&nk_dots_packed_size_f32_loongsonasx, *c = nk_cap_loongsonasx_k;
293
+ return;
294
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_f32_loongsonasx, *c = nk_cap_loongsonasx_k; return;
295
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_f32_loongsonasx, *c = nk_cap_loongsonasx_k; return;
296
+ case nk_kernel_dots_symmetric_k:
297
+ *m = (m_t)&nk_dots_symmetric_f32_loongsonasx, *c = nk_cap_loongsonasx_k;
298
+ return;
299
+ case nk_kernel_angulars_packed_k:
300
+ *m = (m_t)&nk_angulars_packed_f32_loongsonasx, *c = nk_cap_loongsonasx_k;
301
+ return;
302
+ case nk_kernel_angulars_symmetric_k:
303
+ *m = (m_t)&nk_angulars_symmetric_f32_loongsonasx, *c = nk_cap_loongsonasx_k;
304
+ return;
305
+ case nk_kernel_euclideans_packed_k:
306
+ *m = (m_t)&nk_euclideans_packed_f32_loongsonasx, *c = nk_cap_loongsonasx_k;
307
+ return;
308
+ case nk_kernel_euclideans_symmetric_k:
309
+ *m = (m_t)&nk_euclideans_symmetric_f32_loongsonasx, *c = nk_cap_loongsonasx_k;
310
+ return;
311
+ default: break;
312
+ }
313
+ #endif
258
314
  #if NK_TARGET_RVV
259
315
  if (v & nk_cap_rvv_k) switch (k) {
260
316
  case nk_kernel_dot_k: *m = (m_t)&nk_dot_f32_rvv, *c = nk_cap_rvv_k; return;
package/c/dispatch_f64.c CHANGED
@@ -182,6 +182,58 @@ void nk_dispatch_f64_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_punn
182
182
  default: break;
183
183
  }
184
184
  #endif
185
+ #if NK_TARGET_POWERVSX
186
+ if (v & nk_cap_powervsx_k) switch (k) {
187
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_f64_powervsx, *c = nk_cap_powervsx_k; return;
188
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_f64_powervsx, *c = nk_cap_powervsx_k; return;
189
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_f64_powervsx, *c = nk_cap_powervsx_k; return;
190
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_f64_powervsx, *c = nk_cap_powervsx_k; return;
191
+ case nk_kernel_dots_packed_size_k: *m = (m_t)&nk_dots_packed_size_f64_powervsx, *c = nk_cap_powervsx_k; return;
192
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_f64_powervsx, *c = nk_cap_powervsx_k; return;
193
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_f64_powervsx, *c = nk_cap_powervsx_k; return;
194
+ case nk_kernel_dots_symmetric_k: *m = (m_t)&nk_dots_symmetric_f64_powervsx, *c = nk_cap_powervsx_k; return;
195
+ case nk_kernel_angulars_packed_k: *m = (m_t)&nk_angulars_packed_f64_powervsx, *c = nk_cap_powervsx_k; return;
196
+ case nk_kernel_angulars_symmetric_k:
197
+ *m = (m_t)&nk_angulars_symmetric_f64_powervsx, *c = nk_cap_powervsx_k;
198
+ return;
199
+ case nk_kernel_euclideans_packed_k:
200
+ *m = (m_t)&nk_euclideans_packed_f64_powervsx, *c = nk_cap_powervsx_k;
201
+ return;
202
+ case nk_kernel_euclideans_symmetric_k:
203
+ *m = (m_t)&nk_euclideans_symmetric_f64_powervsx, *c = nk_cap_powervsx_k;
204
+ return;
205
+ default: break;
206
+ }
207
+ #endif
208
+ #if NK_TARGET_LOONGSONASX
209
+ if (v & nk_cap_loongsonasx_k) switch (k) {
210
+ case nk_kernel_dot_k: *m = (m_t)&nk_dot_f64_loongsonasx, *c = nk_cap_loongsonasx_k; return;
211
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_f64_loongsonasx, *c = nk_cap_loongsonasx_k; return;
212
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_f64_loongsonasx, *c = nk_cap_loongsonasx_k; return;
213
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_f64_loongsonasx, *c = nk_cap_loongsonasx_k; return;
214
+ case nk_kernel_dots_packed_size_k:
215
+ *m = (m_t)&nk_dots_packed_size_f64_loongsonasx, *c = nk_cap_loongsonasx_k;
216
+ return;
217
+ case nk_kernel_dots_pack_k: *m = (m_t)&nk_dots_pack_f64_loongsonasx, *c = nk_cap_loongsonasx_k; return;
218
+ case nk_kernel_dots_packed_k: *m = (m_t)&nk_dots_packed_f64_loongsonasx, *c = nk_cap_loongsonasx_k; return;
219
+ case nk_kernel_dots_symmetric_k:
220
+ *m = (m_t)&nk_dots_symmetric_f64_loongsonasx, *c = nk_cap_loongsonasx_k;
221
+ return;
222
+ case nk_kernel_angulars_packed_k:
223
+ *m = (m_t)&nk_angulars_packed_f64_loongsonasx, *c = nk_cap_loongsonasx_k;
224
+ return;
225
+ case nk_kernel_angulars_symmetric_k:
226
+ *m = (m_t)&nk_angulars_symmetric_f64_loongsonasx, *c = nk_cap_loongsonasx_k;
227
+ return;
228
+ case nk_kernel_euclideans_packed_k:
229
+ *m = (m_t)&nk_euclideans_packed_f64_loongsonasx, *c = nk_cap_loongsonasx_k;
230
+ return;
231
+ case nk_kernel_euclideans_symmetric_k:
232
+ *m = (m_t)&nk_euclideans_symmetric_f64_loongsonasx, *c = nk_cap_loongsonasx_k;
233
+ return;
234
+ default: break;
235
+ }
236
+ #endif
185
237
  #if NK_TARGET_RVV
186
238
  if (v & nk_cap_rvv_k) switch (k) {
187
239
  case nk_kernel_dot_k: *m = (m_t)&nk_dot_f64_rvv, *c = nk_cap_rvv_k; return;
package/c/dispatch_i4.c CHANGED
@@ -52,6 +52,9 @@ void nk_dispatch_i4_find_(nk_capability_t v, nk_kernel_kind_t k, nk_kernel_punne
52
52
  case nk_kernel_euclideans_symmetric_k:
53
53
  *m = (m_t)&nk_euclideans_symmetric_i4_neonsdot, *c = nk_cap_neonsdot_k;
54
54
  return;
55
+ case nk_kernel_angular_k: *m = (m_t)&nk_angular_i4_neonsdot, *c = nk_cap_neonsdot_k; return;
56
+ case nk_kernel_sqeuclidean_k: *m = (m_t)&nk_sqeuclidean_i4_neonsdot, *c = nk_cap_neonsdot_k; return;
57
+ case nk_kernel_euclidean_k: *m = (m_t)&nk_euclidean_i4_neonsdot, *c = nk_cap_neonsdot_k; return;
55
58
  default: break;
56
59
  }
57
60
  #endif