numkong 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (294) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +495 -0
  3. package/binding.gyp +540 -0
  4. package/c/dispatch.h +512 -0
  5. package/c/dispatch_bf16.c +389 -0
  6. package/c/dispatch_bf16c.c +52 -0
  7. package/c/dispatch_e2m3.c +263 -0
  8. package/c/dispatch_e3m2.c +243 -0
  9. package/c/dispatch_e4m3.c +276 -0
  10. package/c/dispatch_e5m2.c +272 -0
  11. package/c/dispatch_f16.c +376 -0
  12. package/c/dispatch_f16c.c +58 -0
  13. package/c/dispatch_f32.c +378 -0
  14. package/c/dispatch_f32c.c +99 -0
  15. package/c/dispatch_f64.c +296 -0
  16. package/c/dispatch_f64c.c +98 -0
  17. package/c/dispatch_i16.c +96 -0
  18. package/c/dispatch_i32.c +89 -0
  19. package/c/dispatch_i4.c +150 -0
  20. package/c/dispatch_i64.c +86 -0
  21. package/c/dispatch_i8.c +289 -0
  22. package/c/dispatch_other.c +330 -0
  23. package/c/dispatch_u1.c +148 -0
  24. package/c/dispatch_u16.c +124 -0
  25. package/c/dispatch_u32.c +118 -0
  26. package/c/dispatch_u4.c +150 -0
  27. package/c/dispatch_u64.c +102 -0
  28. package/c/dispatch_u8.c +303 -0
  29. package/c/numkong.c +950 -0
  30. package/include/README.md +573 -0
  31. package/include/module.modulemap +129 -0
  32. package/include/numkong/attention/sapphireamx.h +1361 -0
  33. package/include/numkong/attention/sme.h +2066 -0
  34. package/include/numkong/attention.h +49 -0
  35. package/include/numkong/capabilities.h +748 -0
  36. package/include/numkong/cast/README.md +262 -0
  37. package/include/numkong/cast/haswell.h +975 -0
  38. package/include/numkong/cast/icelake.h +470 -0
  39. package/include/numkong/cast/neon.h +1192 -0
  40. package/include/numkong/cast/rvv.h +1021 -0
  41. package/include/numkong/cast/sapphire.h +262 -0
  42. package/include/numkong/cast/serial.h +2262 -0
  43. package/include/numkong/cast/skylake.h +856 -0
  44. package/include/numkong/cast/v128relaxed.h +180 -0
  45. package/include/numkong/cast.h +230 -0
  46. package/include/numkong/curved/README.md +223 -0
  47. package/include/numkong/curved/genoa.h +182 -0
  48. package/include/numkong/curved/haswell.h +276 -0
  49. package/include/numkong/curved/neon.h +205 -0
  50. package/include/numkong/curved/neonbfdot.h +212 -0
  51. package/include/numkong/curved/neonhalf.h +212 -0
  52. package/include/numkong/curved/rvv.h +305 -0
  53. package/include/numkong/curved/serial.h +207 -0
  54. package/include/numkong/curved/skylake.h +457 -0
  55. package/include/numkong/curved/smef64.h +506 -0
  56. package/include/numkong/curved.h +517 -0
  57. package/include/numkong/curved.hpp +144 -0
  58. package/include/numkong/dot/README.md +425 -0
  59. package/include/numkong/dot/alder.h +563 -0
  60. package/include/numkong/dot/genoa.h +315 -0
  61. package/include/numkong/dot/haswell.h +1688 -0
  62. package/include/numkong/dot/icelake.h +883 -0
  63. package/include/numkong/dot/neon.h +818 -0
  64. package/include/numkong/dot/neonbfdot.h +244 -0
  65. package/include/numkong/dot/neonfhm.h +360 -0
  66. package/include/numkong/dot/neonhalf.h +198 -0
  67. package/include/numkong/dot/neonsdot.h +508 -0
  68. package/include/numkong/dot/rvv.h +714 -0
  69. package/include/numkong/dot/rvvbb.h +72 -0
  70. package/include/numkong/dot/rvvbf16.h +123 -0
  71. package/include/numkong/dot/rvvhalf.h +129 -0
  72. package/include/numkong/dot/sapphire.h +141 -0
  73. package/include/numkong/dot/serial.h +838 -0
  74. package/include/numkong/dot/sierra.h +405 -0
  75. package/include/numkong/dot/skylake.h +1084 -0
  76. package/include/numkong/dot/sve.h +379 -0
  77. package/include/numkong/dot/svebfdot.h +74 -0
  78. package/include/numkong/dot/svehalf.h +123 -0
  79. package/include/numkong/dot/v128relaxed.h +1258 -0
  80. package/include/numkong/dot.h +1070 -0
  81. package/include/numkong/dot.hpp +94 -0
  82. package/include/numkong/dots/README.md +496 -0
  83. package/include/numkong/dots/alder.h +114 -0
  84. package/include/numkong/dots/genoa.h +94 -0
  85. package/include/numkong/dots/haswell.h +295 -0
  86. package/include/numkong/dots/icelake.h +171 -0
  87. package/include/numkong/dots/neon.h +120 -0
  88. package/include/numkong/dots/neonbfdot.h +58 -0
  89. package/include/numkong/dots/neonfhm.h +94 -0
  90. package/include/numkong/dots/neonhalf.h +57 -0
  91. package/include/numkong/dots/neonsdot.h +108 -0
  92. package/include/numkong/dots/rvv.h +2486 -0
  93. package/include/numkong/dots/sapphireamx.h +3973 -0
  94. package/include/numkong/dots/serial.h +2844 -0
  95. package/include/numkong/dots/sierra.h +97 -0
  96. package/include/numkong/dots/skylake.h +196 -0
  97. package/include/numkong/dots/sme.h +5372 -0
  98. package/include/numkong/dots/smebi32.h +461 -0
  99. package/include/numkong/dots/smef64.h +1318 -0
  100. package/include/numkong/dots/smehalf.h +47 -0
  101. package/include/numkong/dots/v128relaxed.h +294 -0
  102. package/include/numkong/dots.h +2804 -0
  103. package/include/numkong/dots.hpp +639 -0
  104. package/include/numkong/each/README.md +469 -0
  105. package/include/numkong/each/haswell.h +1658 -0
  106. package/include/numkong/each/icelake.h +272 -0
  107. package/include/numkong/each/neon.h +1104 -0
  108. package/include/numkong/each/neonbfdot.h +212 -0
  109. package/include/numkong/each/neonhalf.h +410 -0
  110. package/include/numkong/each/rvv.h +1121 -0
  111. package/include/numkong/each/sapphire.h +477 -0
  112. package/include/numkong/each/serial.h +260 -0
  113. package/include/numkong/each/skylake.h +1562 -0
  114. package/include/numkong/each.h +2146 -0
  115. package/include/numkong/each.hpp +434 -0
  116. package/include/numkong/geospatial/README.md +147 -0
  117. package/include/numkong/geospatial/haswell.h +593 -0
  118. package/include/numkong/geospatial/neon.h +571 -0
  119. package/include/numkong/geospatial/rvv.h +701 -0
  120. package/include/numkong/geospatial/serial.h +309 -0
  121. package/include/numkong/geospatial/skylake.h +577 -0
  122. package/include/numkong/geospatial/v128relaxed.h +613 -0
  123. package/include/numkong/geospatial.h +453 -0
  124. package/include/numkong/geospatial.hpp +235 -0
  125. package/include/numkong/matrix.hpp +336 -0
  126. package/include/numkong/maxsim/README.md +187 -0
  127. package/include/numkong/maxsim/alder.h +511 -0
  128. package/include/numkong/maxsim/genoa.h +115 -0
  129. package/include/numkong/maxsim/haswell.h +553 -0
  130. package/include/numkong/maxsim/icelake.h +480 -0
  131. package/include/numkong/maxsim/neonsdot.h +394 -0
  132. package/include/numkong/maxsim/sapphireamx.h +877 -0
  133. package/include/numkong/maxsim/serial.h +490 -0
  134. package/include/numkong/maxsim/sme.h +929 -0
  135. package/include/numkong/maxsim/v128relaxed.h +280 -0
  136. package/include/numkong/maxsim.h +571 -0
  137. package/include/numkong/maxsim.hpp +133 -0
  138. package/include/numkong/mesh/README.md +227 -0
  139. package/include/numkong/mesh/haswell.h +2235 -0
  140. package/include/numkong/mesh/neon.h +1329 -0
  141. package/include/numkong/mesh/neonbfdot.h +842 -0
  142. package/include/numkong/mesh/neonhalf.h +616 -0
  143. package/include/numkong/mesh/rvv.h +916 -0
  144. package/include/numkong/mesh/serial.h +742 -0
  145. package/include/numkong/mesh/skylake.h +1135 -0
  146. package/include/numkong/mesh/v128relaxed.h +1052 -0
  147. package/include/numkong/mesh.h +652 -0
  148. package/include/numkong/mesh.hpp +762 -0
  149. package/include/numkong/numkong.h +78 -0
  150. package/include/numkong/numkong.hpp +57 -0
  151. package/include/numkong/probability/README.md +173 -0
  152. package/include/numkong/probability/haswell.h +267 -0
  153. package/include/numkong/probability/neon.h +225 -0
  154. package/include/numkong/probability/rvv.h +409 -0
  155. package/include/numkong/probability/serial.h +169 -0
  156. package/include/numkong/probability/skylake.h +324 -0
  157. package/include/numkong/probability.h +383 -0
  158. package/include/numkong/probability.hpp +120 -0
  159. package/include/numkong/random.h +50 -0
  160. package/include/numkong/random.hpp +285 -0
  161. package/include/numkong/reduce/README.md +547 -0
  162. package/include/numkong/reduce/alder.h +632 -0
  163. package/include/numkong/reduce/genoa.h +201 -0
  164. package/include/numkong/reduce/haswell.h +3783 -0
  165. package/include/numkong/reduce/icelake.h +549 -0
  166. package/include/numkong/reduce/neon.h +3841 -0
  167. package/include/numkong/reduce/neonbfdot.h +353 -0
  168. package/include/numkong/reduce/neonfhm.h +665 -0
  169. package/include/numkong/reduce/neonhalf.h +157 -0
  170. package/include/numkong/reduce/neonsdot.h +357 -0
  171. package/include/numkong/reduce/rvv.h +3407 -0
  172. package/include/numkong/reduce/serial.h +757 -0
  173. package/include/numkong/reduce/sierra.h +338 -0
  174. package/include/numkong/reduce/skylake.h +3792 -0
  175. package/include/numkong/reduce/v128relaxed.h +2302 -0
  176. package/include/numkong/reduce.h +1597 -0
  177. package/include/numkong/reduce.hpp +633 -0
  178. package/include/numkong/scalar/README.md +89 -0
  179. package/include/numkong/scalar/haswell.h +113 -0
  180. package/include/numkong/scalar/neon.h +122 -0
  181. package/include/numkong/scalar/neonhalf.h +70 -0
  182. package/include/numkong/scalar/rvv.h +211 -0
  183. package/include/numkong/scalar/sapphire.h +63 -0
  184. package/include/numkong/scalar/serial.h +332 -0
  185. package/include/numkong/scalar/v128relaxed.h +56 -0
  186. package/include/numkong/scalar.h +683 -0
  187. package/include/numkong/set/README.md +179 -0
  188. package/include/numkong/set/haswell.h +334 -0
  189. package/include/numkong/set/icelake.h +485 -0
  190. package/include/numkong/set/neon.h +364 -0
  191. package/include/numkong/set/rvv.h +226 -0
  192. package/include/numkong/set/rvvbb.h +117 -0
  193. package/include/numkong/set/serial.h +174 -0
  194. package/include/numkong/set/sve.h +185 -0
  195. package/include/numkong/set/v128relaxed.h +240 -0
  196. package/include/numkong/set.h +457 -0
  197. package/include/numkong/set.hpp +114 -0
  198. package/include/numkong/sets/README.md +149 -0
  199. package/include/numkong/sets/haswell.h +63 -0
  200. package/include/numkong/sets/icelake.h +66 -0
  201. package/include/numkong/sets/neon.h +61 -0
  202. package/include/numkong/sets/serial.h +43 -0
  203. package/include/numkong/sets/smebi32.h +1099 -0
  204. package/include/numkong/sets/v128relaxed.h +58 -0
  205. package/include/numkong/sets.h +339 -0
  206. package/include/numkong/sparse/README.md +156 -0
  207. package/include/numkong/sparse/icelake.h +463 -0
  208. package/include/numkong/sparse/neon.h +288 -0
  209. package/include/numkong/sparse/serial.h +117 -0
  210. package/include/numkong/sparse/sve2.h +507 -0
  211. package/include/numkong/sparse/turin.h +322 -0
  212. package/include/numkong/sparse.h +363 -0
  213. package/include/numkong/sparse.hpp +113 -0
  214. package/include/numkong/spatial/README.md +435 -0
  215. package/include/numkong/spatial/alder.h +607 -0
  216. package/include/numkong/spatial/genoa.h +290 -0
  217. package/include/numkong/spatial/haswell.h +960 -0
  218. package/include/numkong/spatial/icelake.h +586 -0
  219. package/include/numkong/spatial/neon.h +773 -0
  220. package/include/numkong/spatial/neonbfdot.h +165 -0
  221. package/include/numkong/spatial/neonhalf.h +118 -0
  222. package/include/numkong/spatial/neonsdot.h +261 -0
  223. package/include/numkong/spatial/rvv.h +984 -0
  224. package/include/numkong/spatial/rvvbf16.h +123 -0
  225. package/include/numkong/spatial/rvvhalf.h +117 -0
  226. package/include/numkong/spatial/sapphire.h +343 -0
  227. package/include/numkong/spatial/serial.h +346 -0
  228. package/include/numkong/spatial/sierra.h +323 -0
  229. package/include/numkong/spatial/skylake.h +606 -0
  230. package/include/numkong/spatial/sve.h +224 -0
  231. package/include/numkong/spatial/svebfdot.h +122 -0
  232. package/include/numkong/spatial/svehalf.h +109 -0
  233. package/include/numkong/spatial/v128relaxed.h +717 -0
  234. package/include/numkong/spatial.h +1425 -0
  235. package/include/numkong/spatial.hpp +183 -0
  236. package/include/numkong/spatials/README.md +580 -0
  237. package/include/numkong/spatials/alder.h +94 -0
  238. package/include/numkong/spatials/genoa.h +94 -0
  239. package/include/numkong/spatials/haswell.h +219 -0
  240. package/include/numkong/spatials/icelake.h +113 -0
  241. package/include/numkong/spatials/neon.h +109 -0
  242. package/include/numkong/spatials/neonbfdot.h +60 -0
  243. package/include/numkong/spatials/neonfhm.h +92 -0
  244. package/include/numkong/spatials/neonhalf.h +58 -0
  245. package/include/numkong/spatials/neonsdot.h +109 -0
  246. package/include/numkong/spatials/rvv.h +1960 -0
  247. package/include/numkong/spatials/sapphireamx.h +1149 -0
  248. package/include/numkong/spatials/serial.h +226 -0
  249. package/include/numkong/spatials/sierra.h +96 -0
  250. package/include/numkong/spatials/skylake.h +184 -0
  251. package/include/numkong/spatials/sme.h +1901 -0
  252. package/include/numkong/spatials/smef64.h +465 -0
  253. package/include/numkong/spatials/v128relaxed.h +240 -0
  254. package/include/numkong/spatials.h +3021 -0
  255. package/include/numkong/spatials.hpp +508 -0
  256. package/include/numkong/tensor.hpp +1592 -0
  257. package/include/numkong/trigonometry/README.md +184 -0
  258. package/include/numkong/trigonometry/haswell.h +652 -0
  259. package/include/numkong/trigonometry/neon.h +639 -0
  260. package/include/numkong/trigonometry/rvv.h +699 -0
  261. package/include/numkong/trigonometry/serial.h +703 -0
  262. package/include/numkong/trigonometry/skylake.h +721 -0
  263. package/include/numkong/trigonometry/v128relaxed.h +666 -0
  264. package/include/numkong/trigonometry.h +467 -0
  265. package/include/numkong/trigonometry.hpp +166 -0
  266. package/include/numkong/types.h +1384 -0
  267. package/include/numkong/types.hpp +5603 -0
  268. package/include/numkong/vector.hpp +698 -0
  269. package/javascript/README.md +246 -0
  270. package/javascript/dist/cjs/numkong-wasm.d.ts +166 -0
  271. package/javascript/dist/cjs/numkong-wasm.js +617 -0
  272. package/javascript/dist/cjs/numkong.d.ts +343 -0
  273. package/javascript/dist/cjs/numkong.js +523 -0
  274. package/javascript/dist/cjs/package.json +3 -0
  275. package/javascript/dist/cjs/types.d.ts +284 -0
  276. package/javascript/dist/cjs/types.js +653 -0
  277. package/javascript/dist/esm/numkong-wasm.d.ts +166 -0
  278. package/javascript/dist/esm/numkong-wasm.js +595 -0
  279. package/javascript/dist/esm/numkong.d.ts +343 -0
  280. package/javascript/dist/esm/numkong.js +452 -0
  281. package/javascript/dist/esm/package.json +3 -0
  282. package/javascript/dist/esm/types.d.ts +284 -0
  283. package/javascript/dist/esm/types.js +630 -0
  284. package/javascript/dist-package-cjs.json +3 -0
  285. package/javascript/dist-package-esm.json +3 -0
  286. package/javascript/node-gyp-build.d.ts +1 -0
  287. package/javascript/numkong-wasm.ts +756 -0
  288. package/javascript/numkong.c +689 -0
  289. package/javascript/numkong.ts +575 -0
  290. package/javascript/tsconfig-base.json +39 -0
  291. package/javascript/tsconfig-cjs.json +8 -0
  292. package/javascript/tsconfig-esm.json +8 -0
  293. package/javascript/types.ts +674 -0
  294. package/package.json +87 -0
@@ -0,0 +1,260 @@
1
+ /**
2
+ * @brief SWAR-accelerated Elementwise Arithmetic for SIMD-free CPUs.
3
+ * @file include/numkong/each/serial.h
4
+ * @author Ash Vardanian
5
+ * @date December 27, 2025
6
+ *
7
+ * @sa include/numkong/each.h
8
+ */
9
+ #ifndef NK_EACH_SERIAL_H
10
+ #define NK_EACH_SERIAL_H
11
+
12
+ #include "numkong/types.h"
13
+ #include "numkong/cast/serial.h" // `nk_f16_to_f32_serial`
14
+
15
+ #if defined(__cplusplus)
16
+ extern "C" {
17
+ #endif
18
+
19
+ #define nk_define_each_scale_(input_type, accumulator_type, load_and_convert, convert_and_store) \
20
+ NK_PUBLIC void nk_each_scale_##input_type##_serial( \
21
+ nk_##input_type##_t const *a, nk_size_t n, nk_##accumulator_type##_t const *alpha, \
22
+ nk_##accumulator_type##_t const *beta, nk_##input_type##_t *result) { \
23
+ nk_##accumulator_type##_t alpha_val = *alpha; \
24
+ nk_##accumulator_type##_t beta_val = *beta; \
25
+ nk_##accumulator_type##_t ai, sum; \
26
+ for (nk_size_t i = 0; i != n; ++i) { \
27
+ load_and_convert(a + i, &ai); \
28
+ sum = (nk_##accumulator_type##_t)(alpha_val * ai + beta_val); \
29
+ convert_and_store(&sum, result + i); \
30
+ } \
31
+ }
32
+ #define nk_define_each_sum_(input_type, accumulator_type, load_and_convert, convert_and_store) \
33
+ NK_PUBLIC void nk_each_sum_##input_type##_serial(nk_##input_type##_t const *a, nk_##input_type##_t const *b, \
34
+ nk_size_t n, nk_##input_type##_t *result) { \
35
+ nk_##accumulator_type##_t ai, bi, sum; \
36
+ for (nk_size_t i = 0; i != n; ++i) { \
37
+ load_and_convert(a + i, &ai); \
38
+ load_and_convert(b + i, &bi); \
39
+ sum = ai + bi; \
40
+ convert_and_store(&sum, result + i); \
41
+ } \
42
+ }
43
+
44
+ #define nk_define_each_blend_(input_type, accumulator_type, load_and_convert, convert_and_store) \
45
+ NK_PUBLIC void nk_each_blend_##input_type##_serial( \
46
+ nk_##input_type##_t const *a, nk_##input_type##_t const *b, nk_size_t n, \
47
+ nk_##accumulator_type##_t const *alpha, nk_##accumulator_type##_t const *beta, nk_##input_type##_t *result) { \
48
+ nk_##accumulator_type##_t alpha_val = *alpha; \
49
+ nk_##accumulator_type##_t beta_val = *beta; \
50
+ nk_##accumulator_type##_t ai, bi, ai_scaled, bi_scaled, sum; \
51
+ for (nk_size_t i = 0; i != n; ++i) { \
52
+ load_and_convert(a + i, &ai); \
53
+ load_and_convert(b + i, &bi); \
54
+ ai_scaled = ai * alpha_val; \
55
+ bi_scaled = bi * beta_val; \
56
+ sum = ai_scaled + bi_scaled; \
57
+ convert_and_store(&sum, result + i); \
58
+ } \
59
+ }
60
+
61
+ #define nk_define_each_fma_(input_type, accumulator_type, load_and_convert, convert_and_store) \
62
+ NK_PUBLIC void nk_each_fma_##input_type##_serial( \
63
+ nk_##input_type##_t const *a, nk_##input_type##_t const *b, nk_##input_type##_t const *c, nk_size_t n, \
64
+ nk_##accumulator_type##_t const *alpha, nk_##accumulator_type##_t const *beta, nk_##input_type##_t *result) { \
65
+ nk_##accumulator_type##_t alpha_val = *alpha; \
66
+ nk_##accumulator_type##_t beta_val = *beta; \
67
+ nk_##accumulator_type##_t ai, bi, ci, abi_scaled, ci_scaled, sum; \
68
+ for (nk_size_t i = 0; i != n; ++i) { \
69
+ load_and_convert(a + i, &ai); \
70
+ load_and_convert(b + i, &bi); \
71
+ load_and_convert(c + i, &ci); \
72
+ abi_scaled = ai * bi * alpha_val; \
73
+ ci_scaled = ci * beta_val; \
74
+ sum = abi_scaled + ci_scaled; \
75
+ convert_and_store(&sum, result + i); \
76
+ } \
77
+ }
78
+
79
+ nk_define_each_sum_(f64, f64, nk_assign_from_to_, nk_assign_from_to_) // nk_each_sum_f64_serial
80
+ nk_define_each_sum_(f32, f32, nk_assign_from_to_, nk_assign_from_to_) // nk_each_sum_f32_serial
81
+ nk_define_each_sum_(f16, f32, nk_f16_to_f32_serial, nk_f32_to_f16_serial) // nk_each_sum_f16_serial
82
+ nk_define_each_sum_(bf16, f32, nk_bf16_to_f32_serial, nk_f32_to_bf16_serial) // nk_each_sum_bf16_serial
83
+ nk_define_each_sum_(e4m3, f32, nk_e4m3_to_f32_serial, nk_f32_to_e4m3_serial) // nk_each_sum_e4m3_serial
84
+ nk_define_each_sum_(e5m2, f32, nk_e5m2_to_f32_serial, nk_f32_to_e5m2_serial) // nk_each_sum_e5m2_serial
85
+ nk_define_each_sum_(e2m3, f32, nk_e2m3_to_f32_serial, nk_f32_to_e2m3_serial) // nk_each_sum_e2m3_serial
86
+ nk_define_each_sum_(e3m2, f32, nk_e3m2_to_f32_serial, nk_f32_to_e3m2_serial) // nk_each_sum_e3m2_serial
87
+ nk_define_each_sum_(i8, i64, nk_assign_from_to_, nk_i64_to_i8_serial) // nk_each_sum_i8_serial
88
+ nk_define_each_sum_(u8, i64, nk_assign_from_to_, nk_i64_to_u8_serial) // nk_each_sum_u8_serial
89
+ nk_define_each_sum_(i16, i64, nk_assign_from_to_, nk_i64_to_i16_serial) // nk_each_sum_i16_serial
90
+ nk_define_each_sum_(u16, i64, nk_assign_from_to_, nk_i64_to_u16_serial) // nk_each_sum_u16_serial
91
+ nk_define_each_sum_(i32, i64, nk_assign_from_to_, nk_i64_to_i32_serial) // nk_each_sum_i32_serial
92
+ nk_define_each_sum_(u32, i64, nk_assign_from_to_, nk_i64_to_u32_serial) // nk_each_sum_u32_serial
93
+ nk_define_each_sum_(i64, i64, nk_assign_from_to_, nk_assign_from_to_) // nk_each_sum_i64_serial
94
+ nk_define_each_sum_(u64, u64, nk_assign_from_to_, nk_assign_from_to_) // nk_each_sum_u64_serial
95
+
96
+ nk_define_each_scale_(f64, f64, nk_assign_from_to_, nk_assign_from_to_) // nk_each_scale_f64_serial
97
+ nk_define_each_scale_(f32, f32, nk_assign_from_to_, nk_assign_from_to_) // nk_each_scale_f32_serial
98
+ nk_define_each_scale_(f16, f32, nk_f16_to_f32_serial, nk_f32_to_f16_serial) // nk_each_scale_f16_serial
99
+ nk_define_each_scale_(bf16, f32, nk_bf16_to_f32_serial, nk_f32_to_bf16_serial) // nk_each_scale_bf16_serial
100
+ nk_define_each_scale_(e4m3, f32, nk_e4m3_to_f32_serial, nk_f32_to_e4m3_serial) // nk_each_scale_e4m3_serial
101
+ nk_define_each_scale_(e5m2, f32, nk_e5m2_to_f32_serial, nk_f32_to_e5m2_serial) // nk_each_scale_e5m2_serial
102
+ nk_define_each_scale_(e2m3, f32, nk_e2m3_to_f32_serial, nk_f32_to_e2m3_serial) // nk_each_scale_e2m3_serial
103
+ nk_define_each_scale_(e3m2, f32, nk_e3m2_to_f32_serial, nk_f32_to_e3m2_serial) // nk_each_scale_e3m2_serial
104
+ nk_define_each_scale_(i8, f32, nk_assign_from_to_, nk_f32_to_i8_serial) // nk_each_scale_i8_serial
105
+ nk_define_each_scale_(u8, f32, nk_assign_from_to_, nk_f32_to_u8_serial) // nk_each_scale_u8_serial
106
+ nk_define_each_scale_(i16, f32, nk_assign_from_to_, nk_f32_to_i16_serial) // nk_each_scale_i16_serial
107
+ nk_define_each_scale_(u16, f32, nk_assign_from_to_, nk_f32_to_u16_serial) // nk_each_scale_u16_serial
108
+ nk_define_each_scale_(i32, f64, nk_assign_from_to_, nk_f64_to_i32_serial) // nk_each_scale_i32_serial
109
+ nk_define_each_scale_(u32, f64, nk_assign_from_to_, nk_f64_to_u32_serial) // nk_each_scale_u32_serial
110
+ nk_define_each_scale_(i64, f64, nk_assign_from_to_, nk_f64_to_i64_serial) // nk_each_scale_i64_serial
111
+ nk_define_each_scale_(u64, f64, nk_assign_from_to_, nk_f64_to_u64_serial) // nk_each_scale_u64_serial
112
+
113
+ nk_define_each_blend_(f64, f64, nk_assign_from_to_, nk_assign_from_to_) // nk_each_blend_f64_serial
114
+ nk_define_each_blend_(f32, f32, nk_assign_from_to_, nk_assign_from_to_) // nk_each_blend_f32_serial
115
+ nk_define_each_blend_(f16, f32, nk_f16_to_f32_serial, nk_f32_to_f16_serial) // nk_each_blend_f16_serial
116
+ nk_define_each_blend_(bf16, f32, nk_bf16_to_f32_serial, nk_f32_to_bf16_serial) // nk_each_blend_bf16_serial
117
+ nk_define_each_blend_(e4m3, f32, nk_e4m3_to_f32_serial, nk_f32_to_e4m3_serial) // nk_each_blend_e4m3_serial
118
+ nk_define_each_blend_(e5m2, f32, nk_e5m2_to_f32_serial, nk_f32_to_e5m2_serial) // nk_each_blend_e5m2_serial
119
+ nk_define_each_blend_(e2m3, f32, nk_e2m3_to_f32_serial, nk_f32_to_e2m3_serial) // nk_each_blend_e2m3_serial
120
+ nk_define_each_blend_(e3m2, f32, nk_e3m2_to_f32_serial, nk_f32_to_e3m2_serial) // nk_each_blend_e3m2_serial
121
+ nk_define_each_blend_(i8, f32, nk_assign_from_to_, nk_f32_to_i8_serial) // nk_each_blend_i8_serial
122
+ nk_define_each_blend_(u8, f32, nk_assign_from_to_, nk_f32_to_u8_serial) // nk_each_blend_u8_serial
123
+ nk_define_each_blend_(i16, f32, nk_assign_from_to_, nk_f32_to_i16_serial) // nk_each_blend_i16_serial
124
+ nk_define_each_blend_(u16, f32, nk_assign_from_to_, nk_f32_to_u16_serial) // nk_each_blend_u16_serial
125
+ nk_define_each_blend_(i32, f64, nk_assign_from_to_, nk_f64_to_i32_serial) // nk_each_blend_i32_serial
126
+ nk_define_each_blend_(u32, f64, nk_assign_from_to_, nk_f64_to_u32_serial) // nk_each_blend_u32_serial
127
+ nk_define_each_blend_(i64, f64, nk_assign_from_to_, nk_f64_to_i64_serial) // nk_each_blend_i64_serial
128
+ nk_define_each_blend_(u64, f64, nk_assign_from_to_, nk_f64_to_u64_serial) // nk_each_blend_u64_serial
129
+
130
+ nk_define_each_fma_(f64, f64, nk_assign_from_to_, nk_assign_from_to_) // nk_each_fma_f64_serial
131
+ nk_define_each_fma_(f32, f32, nk_assign_from_to_, nk_assign_from_to_) // nk_each_fma_f32_serial
132
+ nk_define_each_fma_(f16, f32, nk_f16_to_f32_serial, nk_f32_to_f16_serial) // nk_each_fma_f16_serial
133
+ nk_define_each_fma_(bf16, f32, nk_bf16_to_f32_serial, nk_f32_to_bf16_serial) // nk_each_fma_bf16_serial
134
+ nk_define_each_fma_(e4m3, f32, nk_e4m3_to_f32_serial, nk_f32_to_e4m3_serial) // nk_each_fma_e4m3_serial
135
+ nk_define_each_fma_(e5m2, f32, nk_e5m2_to_f32_serial, nk_f32_to_e5m2_serial) // nk_each_fma_e5m2_serial
136
+ nk_define_each_fma_(e2m3, f32, nk_e2m3_to_f32_serial, nk_f32_to_e2m3_serial) // nk_each_fma_e2m3_serial
137
+ nk_define_each_fma_(e3m2, f32, nk_e3m2_to_f32_serial, nk_f32_to_e3m2_serial) // nk_each_fma_e3m2_serial
138
+ nk_define_each_fma_(i8, f32, nk_assign_from_to_, nk_f32_to_i8_serial) // nk_each_fma_i8_serial
139
+ nk_define_each_fma_(u8, f32, nk_assign_from_to_, nk_f32_to_u8_serial) // nk_each_fma_u8_serial
140
+ nk_define_each_fma_(i16, f32, nk_assign_from_to_, nk_f32_to_i16_serial) // nk_each_fma_i16_serial
141
+ nk_define_each_fma_(u16, f32, nk_assign_from_to_, nk_f32_to_u16_serial) // nk_each_fma_u16_serial
142
+ nk_define_each_fma_(i32, f64, nk_assign_from_to_, nk_f64_to_i32_serial) // nk_each_fma_i32_serial
143
+ nk_define_each_fma_(u32, f64, nk_assign_from_to_, nk_f64_to_u32_serial) // nk_each_fma_u32_serial
144
+ nk_define_each_fma_(i64, f64, nk_assign_from_to_, nk_f64_to_i64_serial) // nk_each_fma_i64_serial
145
+ nk_define_each_fma_(u64, f64, nk_assign_from_to_, nk_f64_to_u64_serial) // nk_each_fma_u64_serial
146
+
147
+ #undef nk_define_each_scale_
148
+ #undef nk_define_each_sum_
149
+ #undef nk_define_each_blend_
150
+ #undef nk_define_each_fma_
151
+
152
+ NK_DYNAMIC void nk_each_sum_f32(nk_f32_t const *a, nk_f32_t const *b, nk_size_t n, nk_f32_t *result);
153
+
154
+ NK_PUBLIC void nk_each_sum_f32c_serial(nk_f32c_t const *a, nk_f32c_t const *b, nk_size_t n, nk_f32c_t *result) {
155
+ nk_each_sum_f32((nk_f32_t const *)a, (nk_f32_t const *)b, 2 * n, (nk_f32_t *)result);
156
+ }
157
+
158
+ NK_PUBLIC void nk_each_sum_f64c_serial(nk_f64c_t const *a, nk_f64c_t const *b, nk_size_t n, nk_f64c_t *result) {
159
+ nk_each_sum_f64((nk_f64_t const *)a, (nk_f64_t const *)b, 2 * n, (nk_f64_t *)result);
160
+ }
161
+
162
+ NK_DYNAMIC void nk_each_sum_f64(nk_f64_t const *a, nk_f64_t const *b, nk_size_t n, nk_f64_t *result);
163
+
164
+ NK_PUBLIC void nk_each_scale_f32c_serial(nk_f32c_t const *a, nk_size_t n, nk_f32c_t const *alpha, nk_f32c_t const *beta,
165
+ nk_f32c_t *result) {
166
+ nk_f32_t alpha_real = alpha->real, alpha_imag = alpha->imag;
167
+ nk_f32_t beta_real = beta->real, beta_imag = beta->imag;
168
+ for (nk_size_t i = 0; i != n; ++i) {
169
+ nk_f32_t a_real = a[i].real, a_imag = a[i].imag;
170
+ result[i].real = alpha_real * a_real - alpha_imag * a_imag + beta_real;
171
+ result[i].imag = alpha_real * a_imag + alpha_imag * a_real + beta_imag;
172
+ }
173
+ }
174
+
175
+ NK_PUBLIC void nk_each_scale_f64c_serial(nk_f64c_t const *a, nk_size_t n, nk_f64c_t const *alpha, nk_f64c_t const *beta,
176
+ nk_f64c_t *result) {
177
+ nk_f64_t alpha_real = alpha->real, alpha_imag = alpha->imag;
178
+ nk_f64_t beta_real = beta->real, beta_imag = beta->imag;
179
+ for (nk_size_t i = 0; i != n; ++i) {
180
+ nk_f64_t a_real = a[i].real, a_imag = a[i].imag;
181
+ result[i].real = alpha_real * a_real - alpha_imag * a_imag + beta_real;
182
+ result[i].imag = alpha_real * a_imag + alpha_imag * a_real + beta_imag;
183
+ }
184
+ }
185
+
186
+ NK_PUBLIC void nk_each_blend_f32c_serial(nk_f32c_t const *a, nk_f32c_t const *b, nk_size_t n, nk_f32c_t const *alpha,
187
+ nk_f32c_t const *beta, nk_f32c_t *result) {
188
+ nk_f32_t alpha_real = alpha->real, alpha_imag = alpha->imag;
189
+ nk_f32_t beta_real = beta->real, beta_imag = beta->imag;
190
+ for (nk_size_t i = 0; i != n; ++i) {
191
+ nk_f32_t a_real = a[i].real, a_imag = a[i].imag;
192
+ nk_f32_t b_real = b[i].real, b_imag = b[i].imag;
193
+ nk_f32_t alpha_a_real = alpha_real * a_real - alpha_imag * a_imag;
194
+ nk_f32_t alpha_a_imag = alpha_real * a_imag + alpha_imag * a_real;
195
+ nk_f32_t beta_b_real = beta_real * b_real - beta_imag * b_imag;
196
+ nk_f32_t beta_b_imag = beta_real * b_imag + beta_imag * b_real;
197
+ result[i].real = alpha_a_real + beta_b_real;
198
+ result[i].imag = alpha_a_imag + beta_b_imag;
199
+ }
200
+ }
201
+
202
+ NK_PUBLIC void nk_each_blend_f64c_serial(nk_f64c_t const *a, nk_f64c_t const *b, nk_size_t n, nk_f64c_t const *alpha,
203
+ nk_f64c_t const *beta, nk_f64c_t *result) {
204
+ nk_f64_t alpha_real = alpha->real, alpha_imag = alpha->imag;
205
+ nk_f64_t beta_real = beta->real, beta_imag = beta->imag;
206
+ for (nk_size_t i = 0; i != n; ++i) {
207
+ nk_f64_t a_real = a[i].real, a_imag = a[i].imag;
208
+ nk_f64_t b_real = b[i].real, b_imag = b[i].imag;
209
+ nk_f64_t alpha_a_real = alpha_real * a_real - alpha_imag * a_imag;
210
+ nk_f64_t alpha_a_imag = alpha_real * a_imag + alpha_imag * a_real;
211
+ nk_f64_t beta_b_real = beta_real * b_real - beta_imag * b_imag;
212
+ nk_f64_t beta_b_imag = beta_real * b_imag + beta_imag * b_real;
213
+ result[i].real = alpha_a_real + beta_b_real;
214
+ result[i].imag = alpha_a_imag + beta_b_imag;
215
+ }
216
+ }
217
+
218
+ NK_PUBLIC void nk_each_fma_f32c_serial(nk_f32c_t const *a, nk_f32c_t const *b, nk_f32c_t const *c, nk_size_t n,
219
+ nk_f32c_t const *alpha, nk_f32c_t const *beta, nk_f32c_t *result) {
220
+ nk_f32_t alpha_real = alpha->real, alpha_imag = alpha->imag;
221
+ nk_f32_t beta_real = beta->real, beta_imag = beta->imag;
222
+ for (nk_size_t i = 0; i != n; ++i) {
223
+ nk_f32_t a_real = a[i].real, a_imag = a[i].imag;
224
+ nk_f32_t b_real = b[i].real, b_imag = b[i].imag;
225
+ nk_f32_t c_real = c[i].real, c_imag = c[i].imag;
226
+ nk_f32_t product_real = a_real * b_real - a_imag * b_imag;
227
+ nk_f32_t product_imag = a_real * b_imag + a_imag * b_real;
228
+ nk_f32_t alpha_product_real = alpha_real * product_real - alpha_imag * product_imag;
229
+ nk_f32_t alpha_product_imag = alpha_real * product_imag + alpha_imag * product_real;
230
+ nk_f32_t beta_c_real = beta_real * c_real - beta_imag * c_imag;
231
+ nk_f32_t beta_c_imag = beta_real * c_imag + beta_imag * c_real;
232
+ result[i].real = alpha_product_real + beta_c_real;
233
+ result[i].imag = alpha_product_imag + beta_c_imag;
234
+ }
235
+ }
236
+
237
+ NK_PUBLIC void nk_each_fma_f64c_serial(nk_f64c_t const *a, nk_f64c_t const *b, nk_f64c_t const *c, nk_size_t n,
238
+ nk_f64c_t const *alpha, nk_f64c_t const *beta, nk_f64c_t *result) {
239
+ nk_f64_t alpha_real = alpha->real, alpha_imag = alpha->imag;
240
+ nk_f64_t beta_real = beta->real, beta_imag = beta->imag;
241
+ for (nk_size_t i = 0; i != n; ++i) {
242
+ nk_f64_t a_real = a[i].real, a_imag = a[i].imag;
243
+ nk_f64_t b_real = b[i].real, b_imag = b[i].imag;
244
+ nk_f64_t c_real = c[i].real, c_imag = c[i].imag;
245
+ nk_f64_t product_real = a_real * b_real - a_imag * b_imag;
246
+ nk_f64_t product_imag = a_real * b_imag + a_imag * b_real;
247
+ nk_f64_t alpha_product_real = alpha_real * product_real - alpha_imag * product_imag;
248
+ nk_f64_t alpha_product_imag = alpha_real * product_imag + alpha_imag * product_real;
249
+ nk_f64_t beta_c_real = beta_real * c_real - beta_imag * c_imag;
250
+ nk_f64_t beta_c_imag = beta_real * c_imag + beta_imag * c_real;
251
+ result[i].real = alpha_product_real + beta_c_real;
252
+ result[i].imag = alpha_product_imag + beta_c_imag;
253
+ }
254
+ }
255
+
256
+ #if defined(__cplusplus)
257
+ } // extern "C"
258
+ #endif
259
+
260
+ #endif // NK_EACH_SERIAL_H