numkong 7.0.0 → 7.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (315) hide show
  1. package/README.md +239 -122
  2. package/binding.gyp +25 -491
  3. package/c/dispatch_bf16.c +59 -1
  4. package/c/dispatch_e2m3.c +41 -8
  5. package/c/dispatch_e3m2.c +49 -8
  6. package/c/dispatch_e4m3.c +51 -9
  7. package/c/dispatch_e5m2.c +45 -1
  8. package/c/dispatch_f16.c +79 -26
  9. package/c/dispatch_f16c.c +5 -5
  10. package/c/dispatch_f32.c +56 -0
  11. package/c/dispatch_f64.c +52 -0
  12. package/c/dispatch_i4.c +3 -0
  13. package/c/dispatch_i8.c +62 -3
  14. package/c/dispatch_other.c +18 -0
  15. package/c/dispatch_u1.c +54 -9
  16. package/c/dispatch_u4.c +3 -0
  17. package/c/dispatch_u8.c +64 -3
  18. package/c/numkong.c +3 -0
  19. package/include/README.md +79 -9
  20. package/include/numkong/attention/sapphireamx.h +278 -276
  21. package/include/numkong/attention/sme.h +983 -977
  22. package/include/numkong/attention.h +1 -1
  23. package/include/numkong/capabilities.h +289 -94
  24. package/include/numkong/cast/README.md +40 -40
  25. package/include/numkong/cast/diamond.h +64 -0
  26. package/include/numkong/cast/haswell.h +42 -194
  27. package/include/numkong/cast/icelake.h +42 -37
  28. package/include/numkong/cast/loongsonasx.h +252 -0
  29. package/include/numkong/cast/neon.h +216 -249
  30. package/include/numkong/cast/powervsx.h +449 -0
  31. package/include/numkong/cast/rvv.h +223 -274
  32. package/include/numkong/cast/sapphire.h +18 -18
  33. package/include/numkong/cast/serial.h +1018 -944
  34. package/include/numkong/cast/skylake.h +82 -23
  35. package/include/numkong/cast/v128relaxed.h +462 -105
  36. package/include/numkong/cast.h +24 -0
  37. package/include/numkong/cast.hpp +44 -0
  38. package/include/numkong/curved/README.md +17 -17
  39. package/include/numkong/curved/neon.h +131 -7
  40. package/include/numkong/curved/neonbfdot.h +6 -7
  41. package/include/numkong/curved/rvv.h +26 -26
  42. package/include/numkong/curved/smef64.h +186 -182
  43. package/include/numkong/curved.h +14 -18
  44. package/include/numkong/dot/README.md +154 -137
  45. package/include/numkong/dot/alder.h +43 -43
  46. package/include/numkong/dot/diamond.h +158 -0
  47. package/include/numkong/dot/genoa.h +4 -30
  48. package/include/numkong/dot/haswell.h +215 -180
  49. package/include/numkong/dot/icelake.h +190 -76
  50. package/include/numkong/dot/loongsonasx.h +671 -0
  51. package/include/numkong/dot/neon.h +124 -73
  52. package/include/numkong/dot/neonbfdot.h +11 -12
  53. package/include/numkong/dot/neonfhm.h +44 -46
  54. package/include/numkong/dot/neonfp8.h +323 -0
  55. package/include/numkong/dot/neonsdot.h +190 -76
  56. package/include/numkong/dot/powervsx.h +752 -0
  57. package/include/numkong/dot/rvv.h +92 -84
  58. package/include/numkong/dot/rvvbf16.h +12 -12
  59. package/include/numkong/dot/rvvhalf.h +12 -12
  60. package/include/numkong/dot/sapphire.h +4 -4
  61. package/include/numkong/dot/serial.h +66 -30
  62. package/include/numkong/dot/sierra.h +31 -31
  63. package/include/numkong/dot/skylake.h +142 -110
  64. package/include/numkong/dot/sve.h +217 -177
  65. package/include/numkong/dot/svebfdot.h +10 -10
  66. package/include/numkong/dot/svehalf.h +85 -41
  67. package/include/numkong/dot/svesdot.h +89 -0
  68. package/include/numkong/dot/v128relaxed.h +124 -89
  69. package/include/numkong/dot.h +114 -48
  70. package/include/numkong/dots/README.md +203 -203
  71. package/include/numkong/dots/alder.h +12 -9
  72. package/include/numkong/dots/diamond.h +86 -0
  73. package/include/numkong/dots/genoa.h +10 -4
  74. package/include/numkong/dots/haswell.h +63 -48
  75. package/include/numkong/dots/icelake.h +27 -18
  76. package/include/numkong/dots/loongsonasx.h +176 -0
  77. package/include/numkong/dots/neon.h +14 -11
  78. package/include/numkong/dots/neonbfdot.h +4 -3
  79. package/include/numkong/dots/neonfhm.h +11 -9
  80. package/include/numkong/dots/neonfp8.h +99 -0
  81. package/include/numkong/dots/neonsdot.h +48 -12
  82. package/include/numkong/dots/powervsx.h +194 -0
  83. package/include/numkong/dots/rvv.h +451 -344
  84. package/include/numkong/dots/sapphireamx.h +1028 -984
  85. package/include/numkong/dots/serial.h +213 -197
  86. package/include/numkong/dots/sierra.h +10 -7
  87. package/include/numkong/dots/skylake.h +47 -36
  88. package/include/numkong/dots/sme.h +2001 -2364
  89. package/include/numkong/dots/smebi32.h +175 -162
  90. package/include/numkong/dots/smef64.h +328 -323
  91. package/include/numkong/dots/v128relaxed.h +64 -41
  92. package/include/numkong/dots.h +573 -293
  93. package/include/numkong/dots.hpp +45 -43
  94. package/include/numkong/each/README.md +133 -137
  95. package/include/numkong/each/haswell.h +6 -6
  96. package/include/numkong/each/icelake.h +7 -7
  97. package/include/numkong/each/neon.h +76 -42
  98. package/include/numkong/each/neonbfdot.h +11 -12
  99. package/include/numkong/each/neonhalf.h +24 -116
  100. package/include/numkong/each/rvv.h +28 -28
  101. package/include/numkong/each/sapphire.h +27 -161
  102. package/include/numkong/each/serial.h +6 -6
  103. package/include/numkong/each/skylake.h +7 -7
  104. package/include/numkong/each/v128relaxed.h +562 -0
  105. package/include/numkong/each.h +148 -62
  106. package/include/numkong/each.hpp +2 -2
  107. package/include/numkong/geospatial/README.md +18 -18
  108. package/include/numkong/geospatial/haswell.h +365 -325
  109. package/include/numkong/geospatial/neon.h +350 -306
  110. package/include/numkong/geospatial/rvv.h +4 -4
  111. package/include/numkong/geospatial/skylake.h +376 -340
  112. package/include/numkong/geospatial/v128relaxed.h +366 -327
  113. package/include/numkong/geospatial.h +17 -17
  114. package/include/numkong/matrix.hpp +4 -4
  115. package/include/numkong/maxsim/README.md +14 -14
  116. package/include/numkong/maxsim/alder.h +6 -6
  117. package/include/numkong/maxsim/genoa.h +4 -4
  118. package/include/numkong/maxsim/haswell.h +6 -6
  119. package/include/numkong/maxsim/icelake.h +18 -18
  120. package/include/numkong/maxsim/neonsdot.h +21 -21
  121. package/include/numkong/maxsim/sapphireamx.h +14 -14
  122. package/include/numkong/maxsim/serial.h +6 -6
  123. package/include/numkong/maxsim/sme.h +221 -196
  124. package/include/numkong/maxsim/v128relaxed.h +6 -6
  125. package/include/numkong/mesh/README.md +62 -56
  126. package/include/numkong/mesh/haswell.h +339 -464
  127. package/include/numkong/mesh/neon.h +1100 -519
  128. package/include/numkong/mesh/neonbfdot.h +36 -68
  129. package/include/numkong/mesh/rvv.h +530 -435
  130. package/include/numkong/mesh/serial.h +75 -91
  131. package/include/numkong/mesh/skylake.h +1627 -302
  132. package/include/numkong/mesh/v128relaxed.h +443 -330
  133. package/include/numkong/mesh.h +63 -49
  134. package/include/numkong/mesh.hpp +4 -4
  135. package/include/numkong/numkong.h +3 -3
  136. package/include/numkong/numkong.hpp +1 -0
  137. package/include/numkong/probability/README.md +23 -19
  138. package/include/numkong/probability/neon.h +82 -52
  139. package/include/numkong/probability/rvv.h +28 -23
  140. package/include/numkong/probability/serial.h +51 -39
  141. package/include/numkong/probability.h +20 -23
  142. package/include/numkong/random.h +1 -1
  143. package/include/numkong/reduce/README.md +143 -138
  144. package/include/numkong/reduce/alder.h +81 -77
  145. package/include/numkong/reduce/haswell.h +222 -220
  146. package/include/numkong/reduce/neon.h +629 -519
  147. package/include/numkong/reduce/neonbfdot.h +7 -218
  148. package/include/numkong/reduce/neonfhm.h +9 -381
  149. package/include/numkong/reduce/neonsdot.h +9 -9
  150. package/include/numkong/reduce/rvv.h +928 -802
  151. package/include/numkong/reduce/serial.h +23 -27
  152. package/include/numkong/reduce/sierra.h +20 -20
  153. package/include/numkong/reduce/skylake.h +326 -324
  154. package/include/numkong/reduce/v128relaxed.h +52 -52
  155. package/include/numkong/reduce.h +4 -23
  156. package/include/numkong/reduce.hpp +156 -11
  157. package/include/numkong/scalar/README.md +6 -6
  158. package/include/numkong/scalar/haswell.h +26 -17
  159. package/include/numkong/scalar/loongsonasx.h +74 -0
  160. package/include/numkong/scalar/neon.h +9 -9
  161. package/include/numkong/scalar/powervsx.h +96 -0
  162. package/include/numkong/scalar/rvv.h +2 -2
  163. package/include/numkong/scalar/sapphire.h +21 -10
  164. package/include/numkong/scalar/serial.h +21 -21
  165. package/include/numkong/scalar.h +13 -0
  166. package/include/numkong/set/README.md +28 -28
  167. package/include/numkong/set/haswell.h +12 -12
  168. package/include/numkong/set/icelake.h +14 -14
  169. package/include/numkong/set/loongsonasx.h +181 -0
  170. package/include/numkong/set/neon.h +17 -18
  171. package/include/numkong/set/powervsx.h +326 -0
  172. package/include/numkong/set/rvv.h +4 -4
  173. package/include/numkong/set/serial.h +6 -6
  174. package/include/numkong/set/sve.h +60 -59
  175. package/include/numkong/set/v128relaxed.h +6 -6
  176. package/include/numkong/set.h +21 -7
  177. package/include/numkong/sets/README.md +26 -26
  178. package/include/numkong/sets/loongsonasx.h +52 -0
  179. package/include/numkong/sets/powervsx.h +65 -0
  180. package/include/numkong/sets/smebi32.h +395 -364
  181. package/include/numkong/sets.h +83 -40
  182. package/include/numkong/sparse/README.md +4 -4
  183. package/include/numkong/sparse/icelake.h +101 -101
  184. package/include/numkong/sparse/serial.h +1 -1
  185. package/include/numkong/sparse/sve2.h +137 -141
  186. package/include/numkong/sparse/turin.h +12 -12
  187. package/include/numkong/sparse.h +10 -10
  188. package/include/numkong/spatial/README.md +230 -226
  189. package/include/numkong/spatial/alder.h +113 -116
  190. package/include/numkong/spatial/diamond.h +240 -0
  191. package/include/numkong/spatial/genoa.h +0 -68
  192. package/include/numkong/spatial/haswell.h +74 -55
  193. package/include/numkong/spatial/icelake.h +539 -58
  194. package/include/numkong/spatial/loongsonasx.h +483 -0
  195. package/include/numkong/spatial/neon.h +125 -52
  196. package/include/numkong/spatial/neonbfdot.h +8 -9
  197. package/include/numkong/spatial/neonfp8.h +258 -0
  198. package/include/numkong/spatial/neonsdot.h +180 -12
  199. package/include/numkong/spatial/powervsx.h +738 -0
  200. package/include/numkong/spatial/rvv.h +146 -139
  201. package/include/numkong/spatial/rvvbf16.h +17 -12
  202. package/include/numkong/spatial/rvvhalf.h +13 -10
  203. package/include/numkong/spatial/serial.h +13 -12
  204. package/include/numkong/spatial/sierra.h +232 -39
  205. package/include/numkong/spatial/skylake.h +73 -74
  206. package/include/numkong/spatial/sve.h +93 -72
  207. package/include/numkong/spatial/svebfdot.h +29 -29
  208. package/include/numkong/spatial/svehalf.h +52 -26
  209. package/include/numkong/spatial/svesdot.h +142 -0
  210. package/include/numkong/spatial/v128relaxed.h +293 -41
  211. package/include/numkong/spatial.h +338 -82
  212. package/include/numkong/spatials/README.md +194 -194
  213. package/include/numkong/spatials/diamond.h +82 -0
  214. package/include/numkong/spatials/haswell.h +2 -2
  215. package/include/numkong/spatials/loongsonasx.h +153 -0
  216. package/include/numkong/spatials/neonfp8.h +111 -0
  217. package/include/numkong/spatials/neonsdot.h +34 -0
  218. package/include/numkong/spatials/powervsx.h +153 -0
  219. package/include/numkong/spatials/rvv.h +259 -243
  220. package/include/numkong/spatials/sapphireamx.h +173 -173
  221. package/include/numkong/spatials/serial.h +2 -2
  222. package/include/numkong/spatials/skylake.h +2 -2
  223. package/include/numkong/spatials/sme.h +590 -605
  224. package/include/numkong/spatials/smef64.h +139 -130
  225. package/include/numkong/spatials/v128relaxed.h +2 -2
  226. package/include/numkong/spatials.h +820 -500
  227. package/include/numkong/spatials.hpp +49 -48
  228. package/include/numkong/tensor.hpp +406 -17
  229. package/include/numkong/trigonometry/README.md +19 -19
  230. package/include/numkong/trigonometry/haswell.h +402 -401
  231. package/include/numkong/trigonometry/neon.h +386 -387
  232. package/include/numkong/trigonometry/rvv.h +52 -51
  233. package/include/numkong/trigonometry/serial.h +13 -13
  234. package/include/numkong/trigonometry/skylake.h +373 -369
  235. package/include/numkong/trigonometry/v128relaxed.h +375 -374
  236. package/include/numkong/trigonometry.h +13 -13
  237. package/include/numkong/trigonometry.hpp +2 -2
  238. package/include/numkong/types.h +287 -49
  239. package/include/numkong/types.hpp +436 -12
  240. package/include/numkong/vector.hpp +82 -14
  241. package/javascript/dist/cjs/numkong-wasm.js +6 -12
  242. package/javascript/dist/cjs/numkong.d.ts +7 -1
  243. package/javascript/dist/cjs/numkong.js +37 -11
  244. package/javascript/dist/cjs/types.d.ts +9 -0
  245. package/javascript/dist/cjs/types.js +96 -0
  246. package/javascript/dist/esm/numkong-browser.d.ts +14 -0
  247. package/javascript/dist/esm/numkong-browser.js +23 -0
  248. package/javascript/dist/esm/numkong-wasm.js +6 -12
  249. package/javascript/dist/esm/numkong.d.ts +7 -1
  250. package/javascript/dist/esm/numkong.js +37 -11
  251. package/javascript/dist/esm/types.d.ts +9 -0
  252. package/javascript/dist/esm/types.js +96 -0
  253. package/javascript/node-gyp-build.d.ts +4 -1
  254. package/javascript/numkong-browser.ts +40 -0
  255. package/javascript/numkong-wasm.ts +7 -13
  256. package/javascript/numkong.c +5 -26
  257. package/javascript/numkong.ts +36 -11
  258. package/javascript/tsconfig-base.json +1 -0
  259. package/javascript/tsconfig-cjs.json +6 -1
  260. package/javascript/types.ts +110 -0
  261. package/numkong.gypi +101 -0
  262. package/package.json +34 -13
  263. package/probes/arm_neon.c +8 -0
  264. package/probes/arm_neon_bfdot.c +9 -0
  265. package/probes/arm_neon_fhm.c +9 -0
  266. package/probes/arm_neon_half.c +8 -0
  267. package/probes/arm_neon_sdot.c +9 -0
  268. package/probes/arm_neonfp8.c +9 -0
  269. package/probes/arm_sme.c +16 -0
  270. package/probes/arm_sme2.c +16 -0
  271. package/probes/arm_sme2p1.c +16 -0
  272. package/probes/arm_sme_bf16.c +16 -0
  273. package/probes/arm_sme_bi32.c +16 -0
  274. package/probes/arm_sme_f64.c +16 -0
  275. package/probes/arm_sme_fa64.c +14 -0
  276. package/probes/arm_sme_half.c +16 -0
  277. package/probes/arm_sme_lut2.c +15 -0
  278. package/probes/arm_sve.c +18 -0
  279. package/probes/arm_sve2.c +20 -0
  280. package/probes/arm_sve2p1.c +18 -0
  281. package/probes/arm_sve_bfdot.c +20 -0
  282. package/probes/arm_sve_half.c +18 -0
  283. package/probes/arm_sve_sdot.c +21 -0
  284. package/probes/loongarch_lasx.c +12 -0
  285. package/probes/power_vsx.c +12 -0
  286. package/probes/probe.js +127 -0
  287. package/probes/riscv_rvv.c +14 -0
  288. package/probes/riscv_rvv_bb.c +15 -0
  289. package/probes/riscv_rvv_bf16.c +17 -0
  290. package/probes/riscv_rvv_half.c +14 -0
  291. package/probes/wasm_v128relaxed.c +11 -0
  292. package/probes/x86_alder.c +17 -0
  293. package/probes/x86_diamond.c +17 -0
  294. package/probes/x86_genoa.c +17 -0
  295. package/probes/x86_graniteamx.c +19 -0
  296. package/probes/x86_haswell.c +11 -0
  297. package/probes/x86_icelake.c +17 -0
  298. package/probes/x86_sapphire.c +16 -0
  299. package/probes/x86_sapphireamx.c +18 -0
  300. package/probes/x86_sierra.c +17 -0
  301. package/probes/x86_skylake.c +15 -0
  302. package/probes/x86_turin.c +17 -0
  303. package/wasm/numkong-emscripten.js +2 -0
  304. package/wasm/numkong.d.ts +14 -0
  305. package/wasm/numkong.js +1124 -0
  306. package/wasm/numkong.wasm +0 -0
  307. package/include/numkong/curved/neonhalf.h +0 -212
  308. package/include/numkong/dot/neonhalf.h +0 -198
  309. package/include/numkong/dots/neonhalf.h +0 -57
  310. package/include/numkong/mesh/neonhalf.h +0 -616
  311. package/include/numkong/reduce/neonhalf.h +0 -157
  312. package/include/numkong/spatial/neonhalf.h +0 -118
  313. package/include/numkong/spatial/sapphire.h +0 -343
  314. package/include/numkong/spatials/neonhalf.h +0 -58
  315. package/javascript/README.md +0 -246
@@ -0,0 +1,21 @@
1
+ /* NumKong ISA probe: SVE I8 signed-dot (FEAT_SVEDot) */
2
+ #if defined(_WIN32)
3
+ #error "SVE/SME not supported on Windows ARM"
4
+ #endif
5
+
6
+ #if defined(__APPLE__) && defined(__aarch64__)
7
+ #error "SVE not available on Apple Silicon"
8
+ #endif
9
+
10
+ #if !defined(__ARM_FEATURE_SVE)
11
+ #error "Feature not available"
12
+ #endif
13
+ #include <arm_sve.h>
14
+ int test_svesdot(void) {
15
+ svint32_t acc = svdup_s32(0);
16
+ svint8_t a = svdup_s8(1);
17
+ svint8_t b = svdup_s8(1);
18
+ acc = svdot_s32(acc, a, b);
19
+ return (int)svaddv_s32(svptrue_b32(), acc) >= 0 ? 0 : 1;
20
+ }
21
+ int main(void) { return test_svesdot(); }
@@ -0,0 +1,12 @@
1
+ /* NumKong ISA probe: LoongArch LASX (256-bit SIMD) */
2
+ #if !defined(__loongarch_asx)
3
+ #error "Feature not available"
4
+ #endif
5
+ #include <lasxintrin.h>
6
+ int main(void) {
7
+ __m256i a = __lasx_xvreplgr2vr_w(1);
8
+ __m256i b = __lasx_xvreplgr2vr_w(2);
9
+ __m256i c = __lasx_xvadd_w(a, b);
10
+ int r = __lasx_xvpickve2gr_w(c, 0);
11
+ return r == 3 ? 0 : 1;
12
+ }
@@ -0,0 +1,12 @@
1
+ /* NumKong ISA probe: Power VSX (POWER9+ 128-bit SIMD) */
2
+ #if !defined(__VSX__)
3
+ #error "Feature not available"
4
+ #endif
5
+ #include <altivec.h>
6
+ int main(void) {
7
+ __vector float a = vec_splats(1.0f);
8
+ __vector float b = vec_splats(2.0f);
9
+ __vector float c = vec_madd(a, b, a);
10
+ /* vec_extract requires POWER9+ */
11
+ return vec_extract(c, 0) == 3.0f ? 0 : 1;
12
+ }
@@ -0,0 +1,127 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * NumKong ISA probe script for Node.js / node-gyp builds.
4
+ *
5
+ * Try-compiles each probe .c file from probes/ to determine which ISA
6
+ * extensions the current compiler supports. Writes results to
7
+ * build/nk_probes.h as #define NK_TARGET_FOO 1/0.
8
+ *
9
+ * Usage: node scripts/probe_isa.js
10
+ * Called automatically via package.json "preinstall" hook.
11
+ */
12
+
13
+ const { execSync } = require("child_process");
14
+ const fs = require("fs");
15
+ const path = require("path");
16
+ const os = require("os");
17
+
18
+ const cc = process.env.CC || (process.platform === "win32" ? "cl.exe" : "cc");
19
+ const isWin = process.platform === "win32";
20
+
21
+ /** Try to compile a probe file. Returns true if compilation succeeds. */
22
+ function probeIsa(probeFile, flags) {
23
+ const tmpObj = path.join(
24
+ os.tmpdir(),
25
+ `nk_probe_${path.basename(probeFile, ".c")}${isWin ? ".obj" : ".o"}`,
26
+ );
27
+ try {
28
+ const cmd = isWin
29
+ ? `"${cc}" /c ${flags.join(" ")} "${probeFile}" /Fo"${tmpObj}" /nologo`
30
+ : `"${cc}" -c ${flags.join(" ")} "${probeFile}" -o "${tmpObj}" 2>/dev/null`;
31
+ execSync(cmd, { stdio: "pipe", timeout: 30000 });
32
+ return true;
33
+ } catch {
34
+ return false;
35
+ } finally {
36
+ try {
37
+ fs.unlinkSync(tmpObj);
38
+ } catch { }
39
+ }
40
+ }
41
+
42
+ // Probe table: [define, probeFile, gccFlags, msvcFlags]
43
+ // x86 probes: GCC flags are minimal — each implies its prerequisites.
44
+ // E.g., -mavx512vnni implies -mavx512f; -mavxvnni implies -mavx2.
45
+ const PROBES = [
46
+ // x86
47
+ ["NK_TARGET_HASWELL", "probes/x86_haswell.c", ["-mavx2", "-mfma", "-mf16c"], ["/arch:AVX2"]],
48
+ ["NK_TARGET_SKYLAKE", "probes/x86_skylake.c", ["-mavx512f", "-mavx512bw", "-mavx512dq", "-mavx512vl"], ["/arch:AVX512"]],
49
+ ["NK_TARGET_ICELAKE", "probes/x86_icelake.c", ["-mavx512vnni", "-mavx512vl"], ["/arch:AVX512"]],
50
+ ["NK_TARGET_GENOA", "probes/x86_genoa.c", ["-mavx512bf16", "-mavx512vl"], ["/arch:AVX512"]],
51
+ ["NK_TARGET_SAPPHIRE", "probes/x86_sapphire.c", ["-mavx512fp16", "-mavx512vl"], ["/arch:AVX512"]],
52
+ ["NK_TARGET_SAPPHIREAMX", "probes/x86_sapphireamx.c", ["-mamx-tile", "-mamx-int8"], ["/arch:AVX512"]],
53
+ ["NK_TARGET_GRANITEAMX", "probes/x86_graniteamx.c", ["-mamx-tile", "-mamx-fp16"], ["/arch:AVX512"]],
54
+ ["NK_TARGET_DIAMOND", "probes/x86_diamond.c", ["-mavx10.2-512"], ["/arch:AVX10.2"]],
55
+ ["NK_TARGET_TURIN", "probes/x86_turin.c", ["-mavx512vp2intersect"], ["/arch:AVX512"]],
56
+ ["NK_TARGET_ALDER", "probes/x86_alder.c", ["-mavxvnni"], ["/arch:AVX2"]],
57
+ ["NK_TARGET_SIERRA", "probes/x86_sierra.c", ["-mavxvnniint8"], ["/arch:AVX2"]],
58
+ // ARM NEON base probes — msvc_flags are empty because MSVC does not define
59
+ // __ARM_FEATURE_* macros via /arch: flags. For MSVC header-only builds,
60
+ // types.h infers features from __ARM_ARCH level instead.
61
+ ["NK_TARGET_NEON", "probes/arm_neon.c", ["-march=armv8-a+simd"], []], // FEAT_AdvSIMD
62
+ ["NK_TARGET_NEONHALF", "probes/arm_neon_half.c", ["-march=armv8.2-a+simd+fp16"], ["/arch:armv8.2"]], // FEAT_FP16
63
+ ["NK_TARGET_NEONSDOT", "probes/arm_neon_sdot.c", ["-march=armv8.2-a+dotprod"], ["/arch:armv8.4"]], // FEAT_DotProd
64
+ ["NK_TARGET_NEONBFDOT", "probes/arm_neon_bfdot.c", ["-march=armv8.6-a+simd+bf16"], ["/arch:armv8.6"]], // FEAT_BF16
65
+ ["NK_TARGET_NEONFHM", "probes/arm_neon_fhm.c", ["-march=armv8.2-a+simd+fp16+fp16fml"], ["/arch:armv8.4"]], // FEAT_FHM
66
+ // ARM SVE/SME
67
+ ["NK_TARGET_SVE", "probes/arm_sve.c", ["-march=armv8.2-a+sve"], []],
68
+ ["NK_TARGET_SVEHALF", "probes/arm_sve_half.c", ["-march=armv8.2-a+sve+fp16"], []],
69
+ ["NK_TARGET_SVEBFDOT", "probes/arm_sve_bfdot.c", ["-march=armv8.2-a+sve+bf16"], []],
70
+ ["NK_TARGET_SVESDOT", "probes/arm_sve_sdot.c", ["-march=armv8.2-a+sve+dotprod"], []],
71
+ ["NK_TARGET_SVE2", "probes/arm_sve2.c", ["-march=armv8.2-a+sve2"], []],
72
+ ["NK_TARGET_SVE2P1", "probes/arm_sve2p1.c", ["-march=armv8.2-a+sve2p1"], []],
73
+ ["NK_TARGET_NEONFP8", "probes/arm_neonfp8.c", ["-march=armv8-a+simd+fp8dot4"], []],
74
+ ["NK_TARGET_SME", "probes/arm_sme.c", ["-march=armv8-a+sme"], []],
75
+ ["NK_TARGET_SME2", "probes/arm_sme2.c", ["-march=armv8-a+sme2"], []],
76
+ ["NK_TARGET_SME2P1", "probes/arm_sme2p1.c", ["-march=armv8-a+sme2p1"], []],
77
+ ["NK_TARGET_SMEF64", "probes/arm_sme_f64.c", ["-march=armv8-a+sme+sme-f64f64"], []],
78
+ ["NK_TARGET_SMEHALF", "probes/arm_sme_half.c", ["-march=armv8-a+sme+sme-f16f16"], []],
79
+ ["NK_TARGET_SMEBF16", "probes/arm_sme_bf16.c", ["-march=armv8-a+sme2+b16b16"], []],
80
+ ["NK_TARGET_SMEBI32", "probes/arm_sme_bi32.c", ["-march=armv8-a+sme2+sme-i16i32"], []],
81
+ ["NK_TARGET_SMELUT2", "probes/arm_sme_lut2.c", ["-march=armv8-a+sme2+lut"], []],
82
+ ["NK_TARGET_SMEFA64", "probes/arm_sme_fa64.c", ["-march=armv8-a+sme+sme-fa64"], []],
83
+ // RISC-V
84
+ ["NK_TARGET_RVV", "probes/riscv_rvv.c", ["-march=rv64gcv"], []],
85
+ ["NK_TARGET_RVVHALF", "probes/riscv_rvv_half.c", ["-march=rv64gcv_zvfh"], []],
86
+ ["NK_TARGET_RVVBF16", "probes/riscv_rvv_bf16.c", ["-march=rv64gcv_zvfbfwma"], []],
87
+ ["NK_TARGET_RVVBB", "probes/riscv_rvv_bb.c", ["-march=rv64gcv_zvbb"], []],
88
+ // LoongArch
89
+ ["NK_TARGET_LOONGSONASX", "probes/loongarch_lasx.c", ["-mlasx"], []],
90
+ // Power
91
+ ["NK_TARGET_POWERVSX", "probes/power_vsx.c", ["-mcpu=power9", "-mvsx"], []],
92
+ // WASM
93
+ ["NK_TARGET_V128RELAXED", "probes/wasm_v128relaxed.c", ["-mrelaxed-simd"], []],
94
+ ];
95
+
96
+ function main() {
97
+ const pkgRoot = path.join(__dirname, "..");
98
+ fs.mkdirSync(pkgRoot, { recursive: true });
99
+
100
+ const arch = process.arch; // 'x64', 'arm64', etc.
101
+ const lines = [
102
+ "/* Auto-generated by scripts/probe_isa.js — do not edit */",
103
+ `/* Compiler: ${cc}, Platform: ${process.platform}, Arch: ${arch} */`,
104
+ "",
105
+ ];
106
+
107
+ let enabled = 0;
108
+ for (const [define, probeFile, gccFlags, msvcFlags] of PROBES) {
109
+ const flags = isWin ? msvcFlags : gccFlags;
110
+ const supported = probeIsa(path.join(pkgRoot, probeFile), flags);
111
+ lines.push(`#define ${define} ${supported ? 1 : 0}`);
112
+ if (supported) {
113
+ enabled++;
114
+ console.log(`[NumKong] Probe ${define}: supported`);
115
+ }
116
+ }
117
+
118
+ lines.push("");
119
+ const header = lines.join("\n");
120
+ const outPath = path.join(pkgRoot, "nk_probes.h");
121
+ fs.writeFileSync(outPath, header);
122
+ console.log(
123
+ `[NumKong] Wrote ${outPath} (${enabled} ISAs enabled out of ${PROBES.length})`,
124
+ );
125
+ }
126
+
127
+ main();
@@ -0,0 +1,14 @@
1
+ /* NumKong ISA probe: RVV 1.0 (RISC-V Vector Extension) */
2
+ #if !defined(__riscv_v)
3
+ #error "Feature not available"
4
+ #endif
5
+ #include <riscv_vector.h>
6
+ int main(void) {
7
+ size_t vl = __riscv_vsetvl_e32m1(4);
8
+ vfloat32m1_t a = __riscv_vfmv_v_f_f32m1(1.0f, vl);
9
+ vfloat32m1_t b = __riscv_vfmv_v_f_f32m1(2.0f, vl);
10
+ vfloat32m1_t c = __riscv_vfadd_vv_f32m1(a, b, vl);
11
+ vfloat32m1_t sum = __riscv_vfredusum_vs_f32m1_f32m1(c, __riscv_vfmv_v_f_f32m1(0.0f, vl), vl);
12
+ float result = __riscv_vfmv_f_s_f32m1_f32(sum);
13
+ return result > 0.0f ? 0 : 1;
14
+ }
@@ -0,0 +1,15 @@
1
+ /* NumKong ISA probe: RVV Zvbb (basic bit-manipulation) */
2
+ #if !defined(__riscv_zvbb)
3
+ #error "Feature not available"
4
+ #endif
5
+ #include <riscv_vector.h>
6
+ int main(void) {
7
+ size_t vl = __riscv_vsetvl_e8m1(4);
8
+ vuint8m1_t a = __riscv_vmv_v_x_u8m1(0xFF, vl);
9
+ /* vcpop.v — per-element popcount, the key Zvbb instruction */
10
+ vuint8m1_t popcnt = __riscv_vcpop_v_u8m1(a, vl);
11
+ vuint8m1_t sum = __riscv_vredsum_vs_u8m1_u8m1(popcnt, __riscv_vmv_v_x_u8m1(0, 1), vl);
12
+ unsigned char result = __riscv_vmv_x_s_u8m1_u8(sum);
13
+ /* Each lane is 0xFF → popcount 8, sum of 4 lanes = 32 */
14
+ return result == 32 ? 0 : 1;
15
+ }
@@ -0,0 +1,17 @@
1
+ /* NumKong ISA probe: RVV Zvfbfwma (BF16 widening FMA) */
2
+ #if !defined(__riscv_zvfbfwma)
3
+ #error "Feature not available"
4
+ #endif
5
+ #include <riscv_vector.h>
6
+ int main(void) {
7
+ size_t vl = __riscv_vsetvl_e16m1(4);
8
+ vuint16m1_t raw = __riscv_vmv_v_x_u16m1(0x3F80, vl); /* bf16 1.0 */
9
+ vbfloat16m1_t a = __riscv_vreinterpret_v_u16m1_bf16m1(raw);
10
+ size_t vl32 = __riscv_vsetvl_e32m2(4);
11
+ vfloat32m2_t acc = __riscv_vfmv_v_f_f32m2(0.0f, vl32);
12
+ acc = __riscv_vfwmaccbf16_vv_f32m2(acc, a, a, vl);
13
+ vfloat32m1_t sum = __riscv_vfredusum_vs_f32m2_f32m1(acc, __riscv_vfmv_v_f_f32m1(0.0f, __riscv_vsetvl_e32m1(1)),
14
+ vl32);
15
+ float result = __riscv_vfmv_f_s_f32m1_f32(sum);
16
+ return result > 0.0f ? 0 : 1;
17
+ }
@@ -0,0 +1,14 @@
1
+ /* NumKong ISA probe: RVV Zvfh (half-precision vector) */
2
+ #if !defined(__riscv_zvfh)
3
+ #error "Feature not available"
4
+ #endif
5
+ #include <riscv_vector.h>
6
+ int main(void) {
7
+ size_t vl = __riscv_vsetvl_e16m1(4);
8
+ vfloat16m1_t a = __riscv_vfmv_v_f_f16m1((_Float16)1.0f, vl);
9
+ vfloat32m2_t wide = __riscv_vfwcvt_f_f_v_f32m2(a, vl);
10
+ vfloat32m1_t sum = __riscv_vfredusum_vs_f32m2_f32m1(wide, __riscv_vfmv_v_f_f32m1(0.0f, __riscv_vsetvl_e32m1(1)),
11
+ vl);
12
+ float result = __riscv_vfmv_f_s_f32m1_f32(sum);
13
+ return result > 0.0f ? 0 : 1;
14
+ }
@@ -0,0 +1,11 @@
1
+ /* NumKong ISA probe: WASM Relaxed SIMD (v128) */
2
+ #if !defined(__wasm_relaxed_simd__)
3
+ #error "WASM Relaxed SIMD not available"
4
+ #endif
5
+ #include <wasm_simd128.h>
6
+ int main(void) {
7
+ v128_t a = wasm_f32x4_splat(1.0f);
8
+ v128_t b = wasm_f32x4_splat(2.0f);
9
+ v128_t c = wasm_f32x4_relaxed_madd(a, b, a);
10
+ return wasm_f32x4_extract_lane(c, 0) > 0.0f ? 0 : 1;
11
+ }
@@ -0,0 +1,17 @@
1
+ /* NumKong ISA probe: Alder Lake (AVX-VNNI 256-bit) */
2
+ #if defined(__APPLE__)
3
+ #error "AVX-512 not available on macOS"
4
+ #endif
5
+
6
+ #if !defined(__AVXVNNI__)
7
+ #error "Feature not available"
8
+ #endif
9
+ #include <immintrin.h>
10
+ int main(void) {
11
+ volatile int two = 2;
12
+ __m256i acc = _mm256_setzero_si256();
13
+ __m256i a = _mm256_set1_epi8((char)two);
14
+ __m256i b = _mm256_set1_epi8((char)(two + 1));
15
+ acc = _mm256_dpbusd_avx_epi32(acc, a, b);
16
+ return _mm256_extract_epi32(acc, 0) == 24 ? 0 : 1;
17
+ }
@@ -0,0 +1,17 @@
1
+ /* NumKong ISA probe: Diamond Rapids (AVX10.2) */
2
+ #if defined(__APPLE__)
3
+ #error "AVX-512 not available on macOS"
4
+ #endif
5
+
6
+ #if !defined(__AVX512FP16__)
7
+ #error "Feature not available"
8
+ #endif
9
+ #include <immintrin.h>
10
+ int main(void) {
11
+ volatile int one = 1;
12
+ __m256i acc = _mm256_setzero_si256();
13
+ __m256i a = _mm256_set1_epi8((char)one);
14
+ __m256i b = _mm256_set1_epi8((char)one);
15
+ acc = _mm256_dpbe4ss_epi32(acc, a, b);
16
+ return _mm256_extract_epi32(acc, 0) != 0 ? 0 : 1;
17
+ }
@@ -0,0 +1,17 @@
1
+ /* NumKong ISA probe: Genoa (AVX-512F/BW/DQ/VL + BF16) */
2
+ #if defined(__APPLE__)
3
+ #error "AVX-512 not available on macOS"
4
+ #endif
5
+
6
+ #if !defined(__AVX512BF16__)
7
+ #error "Feature not available"
8
+ #endif
9
+ #include <immintrin.h>
10
+ int main(void) {
11
+ volatile float one = 1.0f;
12
+ __m512 f = _mm512_set1_ps(one);
13
+ __m256bh a = _mm512_cvtneps_pbh(f);
14
+ __m512bh wide = (__m512bh)_mm512_castsi512_ps(_mm512_inserti64x4(_mm512_setzero_si512(), (__m256i)a, 0));
15
+ __m512 r = _mm512_dpbf16_ps(_mm512_setzero_ps(), wide, wide);
16
+ return _mm512_reduce_add_ps(r) >= 0.0f ? 0 : 1;
17
+ }
@@ -0,0 +1,19 @@
1
+ /* NumKong ISA probe: Granite Rapids AMX (AMX-TILE + AMX-FP16) */
2
+ #if defined(__APPLE__)
3
+ #error "AVX-512 not available on macOS"
4
+ #endif
5
+
6
+ #if defined(__FreeBSD__)
7
+ #error "AMX not supported on FreeBSD"
8
+ #endif
9
+
10
+ #if !defined(__AMX_FP16__)
11
+ #error "Feature not available"
12
+ #endif
13
+ #include <immintrin.h>
14
+ #include <amxfp16intrin.h>
15
+ int main(void) {
16
+ volatile int zero = 0;
17
+ _tile_release();
18
+ return zero;
19
+ }
@@ -0,0 +1,11 @@
1
+ /* NumKong ISA probe: Haswell (AVX2 + FMA + F16C) */
2
+ #if !defined(__AVX2__)
3
+ #error "Feature not available"
4
+ #endif
5
+ #include <immintrin.h>
6
+ int main(void) {
7
+ volatile int one = 1;
8
+ __m256i a = _mm256_set1_epi32(one);
9
+ __m256i b = _mm256_add_epi32(a, a);
10
+ return _mm256_extract_epi32(b, 0) == 2 ? 0 : 1;
11
+ }
@@ -0,0 +1,17 @@
1
+ /* NumKong ISA probe: Ice Lake (AVX-512F/BW/DQ/VL + VNNI + VBMI + VPOPCNTDQ) */
2
+ #if defined(__APPLE__)
3
+ #error "AVX-512 not available on macOS"
4
+ #endif
5
+
6
+ #if !defined(__AVX512VNNI__)
7
+ #error "Feature not available"
8
+ #endif
9
+ #include <immintrin.h>
10
+ int main(void) {
11
+ volatile int one = 1;
12
+ __m512i acc = _mm512_setzero_si512();
13
+ __m512i a = _mm512_set1_epi8((char)one);
14
+ __m512i b = _mm512_set1_epi8((char)one);
15
+ acc = _mm512_dpbusd_epi32(acc, a, b);
16
+ return (int)_mm512_reduce_add_epi32(acc) == 64 ? 0 : 1;
17
+ }
@@ -0,0 +1,16 @@
1
+ /* NumKong ISA probe: Sapphire Rapids (AVX-512F/BW/DQ/VL + FP16) */
2
+ #if defined(__APPLE__)
3
+ #error "AVX-512 not available on macOS"
4
+ #endif
5
+
6
+ #if !defined(__AVX512FP16__)
7
+ #error "Feature not available"
8
+ #endif
9
+ #include <immintrin.h>
10
+ int main(void) {
11
+ volatile float one = 1.0f;
12
+ __m512h a = _mm512_set1_ph((_Float16)one);
13
+ __m512h b = _mm512_set1_ph((_Float16)(one + one));
14
+ __m512h c = _mm512_fmadd_ph(a, b, a);
15
+ return (int)_mm_extract_epi16(_mm256_castsi256_si128(_mm512_castsi512_si256((__m512i)c)), 0) != 0 ? 0 : 1;
16
+ }
@@ -0,0 +1,18 @@
1
+ /* NumKong ISA probe: Sapphire Rapids AMX (AMX-TILE + AMX-INT8) */
2
+ #if defined(__APPLE__)
3
+ #error "AVX-512 not available on macOS"
4
+ #endif
5
+
6
+ #if defined(__FreeBSD__)
7
+ #error "AMX not supported on FreeBSD"
8
+ #endif
9
+
10
+ #if !defined(__AMX_INT8__)
11
+ #error "Feature not available"
12
+ #endif
13
+ #include <immintrin.h>
14
+ int main(void) {
15
+ volatile int zero = 0;
16
+ _tile_release();
17
+ return zero;
18
+ }
@@ -0,0 +1,17 @@
1
+ /* NumKong ISA probe: Sierra Forest (AVXVNNIINT8) */
2
+ #if defined(__APPLE__)
3
+ #error "AVX-512 not available on macOS"
4
+ #endif
5
+
6
+ #if !defined(__AVXVNNIINT8__)
7
+ #error "Feature not available"
8
+ #endif
9
+ #include <immintrin.h>
10
+ int main(void) {
11
+ volatile int two = 2;
12
+ __m256i acc = _mm256_setzero_si256();
13
+ __m256i a = _mm256_set1_epi8((char)two);
14
+ __m256i b = _mm256_set1_epi8((char)(two + 1));
15
+ acc = _mm256_dpbssd_epi32(acc, a, b);
16
+ return _mm256_extract_epi32(acc, 0) == 24 ? 0 : 1;
17
+ }
@@ -0,0 +1,15 @@
1
+ /* NumKong ISA probe: Skylake (AVX-512F/BW/DQ/VL) */
2
+ #if defined(__APPLE__)
3
+ #error "AVX-512 not available on macOS"
4
+ #endif
5
+
6
+ #if !defined(__AVX512F__)
7
+ #error "Feature not available"
8
+ #endif
9
+ #include <immintrin.h>
10
+ int main(void) {
11
+ volatile int one = 1;
12
+ __m512i a = _mm512_set1_epi32(one);
13
+ __m512i b = _mm512_add_epi32(a, a);
14
+ return (int)_mm512_reduce_add_epi32(b) == 32 ? 0 : 1;
15
+ }
@@ -0,0 +1,17 @@
1
+ /* NumKong ISA probe: Turin (AVX-512F + VP2INTERSECT) */
2
+ #if defined(__APPLE__)
3
+ #error "AVX-512 not available on macOS"
4
+ #endif
5
+
6
+ #if !defined(__AVX512VP2INTERSECT__)
7
+ #error "Feature not available"
8
+ #endif
9
+ #include <immintrin.h>
10
+ int main(void) {
11
+ volatile int val = 42;
12
+ __m512i a = _mm512_set1_epi32(val);
13
+ __m512i b = _mm512_set1_epi32(val);
14
+ __mmask16 k0, k1;
15
+ _mm512_2intersect_epi32(a, b, &k0, &k1);
16
+ return k0 != 0 ? 0 : 1;
17
+ }
@@ -0,0 +1,2 @@
1
+ async function NumKongModule(moduleArg={}){var moduleRtn;var Module=moduleArg;var ENVIRONMENT_IS_WEB=!!globalThis.window;var ENVIRONMENT_IS_WORKER=!!globalThis.WorkerGlobalScope;var ENVIRONMENT_IS_NODE=globalThis.process?.versions?.node&&globalThis.process?.type!="renderer";if(ENVIRONMENT_IS_NODE){const{createRequire}=await import("node:module");var require=createRequire(import.meta.url)}var arguments_=[];var thisProgram="./this.program";var quit_=(status,toThrow)=>{throw toThrow};var _scriptName=import.meta.url;var scriptDirectory="";function locateFile(path){if(Module["locateFile"]){return Module["locateFile"](path,scriptDirectory)}return scriptDirectory+path}var readAsync,readBinary;if(ENVIRONMENT_IS_NODE){var fs=require("node:fs");if(_scriptName.startsWith("file:")){scriptDirectory=require("node:path").dirname(require("node:url").fileURLToPath(_scriptName))+"/"}readBinary=filename=>{filename=isFileURI(filename)?new URL(filename):filename;var ret=fs.readFileSync(filename);return ret};readAsync=async(filename,binary=true)=>{filename=isFileURI(filename)?new URL(filename):filename;var ret=fs.readFileSync(filename,binary?undefined:"utf8");return ret};if(process.argv.length>1){thisProgram=process.argv[1].replace(/\\/g,"/")}arguments_=process.argv.slice(2);quit_=(status,toThrow)=>{process.exitCode=status;throw toThrow}}else if(ENVIRONMENT_IS_WEB||ENVIRONMENT_IS_WORKER){try{scriptDirectory=new URL(".",_scriptName).href}catch{}{readAsync=async url=>{var response=await fetch(url,{credentials:"same-origin"});if(response.ok){return response.arrayBuffer()}throw new Error(response.status+" : "+response.url)}}}else{}var out=console.log.bind(console);var err=console.error.bind(console);var wasmBinary;var ABORT=false;var isFileURI=filename=>filename.startsWith("file://");class EmscriptenEH{}class EmscriptenSjLj extends EmscriptenEH{}var readyPromiseResolve,readyPromiseReject;var runtimeInitialized=false;function updateMemoryViews(){var b=wasmMemory.buffer;HEAP8=new Int8Array(b);HEAP16=new Int16Array(b);HEAPU8=new Uint8Array(b);HEAPU16=new Uint16Array(b);HEAP32=new Int32Array(b);HEAPU32=new Uint32Array(b);HEAPF32=new Float32Array(b);HEAPF64=new Float64Array(b);HEAP64=new BigInt64Array(b);HEAPU64=new BigUint64Array(b)}function initMemory(){if(Module["wasmMemory"]){wasmMemory=Module["wasmMemory"]}else{var INITIAL_MEMORY=Module["INITIAL_MEMORY"]||16777216;wasmMemory=new WebAssembly.Memory({initial:INITIAL_MEMORY/65536,maximum:32768})}updateMemoryViews()}function preRun(){if(Module["preRun"]){if(typeof Module["preRun"]=="function")Module["preRun"]=[Module["preRun"]];while(Module["preRun"].length){addOnPreRun(Module["preRun"].shift())}}callRuntimeCallbacks(onPreRuns)}function initRuntime(){runtimeInitialized=true;wasmExports["d"]()}function postRun(){if(Module["postRun"]){if(typeof Module["postRun"]=="function")Module["postRun"]=[Module["postRun"]];while(Module["postRun"].length){addOnPostRun(Module["postRun"].shift())}}callRuntimeCallbacks(onPostRuns)}function abort(what){Module["onAbort"]?.(what);what=`Aborted(${what})`;err(what);ABORT=true;what+=". Build with -sASSERTIONS for more info.";var e=new WebAssembly.RuntimeError(what);readyPromiseReject?.(e);throw e}var wasmBinaryFile;function findWasmBinary(){if(Module["locateFile"]){return locateFile("numkong.wasm")}return new URL("numkong.wasm",import.meta.url).href}function getBinarySync(file){if(file==wasmBinaryFile&&wasmBinary){return new Uint8Array(wasmBinary)}if(readBinary){return readBinary(file)}throw"both async and sync fetching of the wasm failed"}async function getWasmBinary(binaryFile){if(!wasmBinary){try{var response=await readAsync(binaryFile);return new Uint8Array(response)}catch{}}return getBinarySync(binaryFile)}async function instantiateArrayBuffer(binaryFile,imports){try{var binary=await getWasmBinary(binaryFile);var instance=await WebAssembly.instantiate(binary,imports);return instance}catch(reason){err(`failed to asynchronously prepare wasm: ${reason}`);abort(reason)}}async function instantiateAsync(binary,binaryFile,imports){if(!binary&&!ENVIRONMENT_IS_NODE){try{var response=fetch(binaryFile,{credentials:"same-origin"});var instantiationResult=await WebAssembly.instantiateStreaming(response,imports);return instantiationResult}catch(reason){err(`wasm streaming compile failed: ${reason}`);err("falling back to ArrayBuffer instantiation")}}return instantiateArrayBuffer(binaryFile,imports)}function getWasmImports(){var imports={a:wasmImports};return imports}async function createWasm(){function receiveInstance(instance,module){wasmExports=instance.exports;assignWasmExports(wasmExports);return wasmExports}function receiveInstantiationResult(result){return receiveInstance(result["instance"])}var info=getWasmImports();if(Module["instantiateWasm"]){return new Promise((resolve,reject)=>{Module["instantiateWasm"](info,(inst,mod)=>{resolve(receiveInstance(inst,mod))})})}wasmBinaryFile??=findWasmBinary();var result=await instantiateAsync(wasmBinary,wasmBinaryFile,info);var exports=receiveInstantiationResult(result);return exports}class ExitStatus{name="ExitStatus";constructor(status){this.message=`Program terminated with exit(${status})`;this.status=status}}var HEAP16;var HEAP32;var HEAP64;var HEAP8;var HEAPF32;var HEAPF64;var HEAPU16;var HEAPU32;var HEAPU64;var HEAPU8;var callRuntimeCallbacks=callbacks=>{while(callbacks.length>0){callbacks.shift()(Module)}};var onPostRuns=[];var addOnPostRun=cb=>onPostRuns.push(cb);var onPreRuns=[];var addOnPreRun=cb=>onPreRuns.push(cb);var noExitRuntime=true;var wasmMemory;var getHeapMax=()=>2147483648;var alignMemory=(size,alignment)=>Math.ceil(size/alignment)*alignment;var growMemory=size=>{var oldHeapSize=wasmMemory.buffer.byteLength;var pages=(size-oldHeapSize+65535)/65536|0;try{wasmMemory.grow(pages);updateMemoryViews();return 1}catch(e){}};var _emscripten_resize_heap=requestedSize=>{var oldSize=HEAPU8.length;requestedSize>>>=0;var maxHeapSize=getHeapMax();if(requestedSize>maxHeapSize){return false}for(var cutDown=1;cutDown<=4;cutDown*=2){var overGrownHeapSize=oldSize*(1+.2/cutDown);overGrownHeapSize=Math.min(overGrownHeapSize,requestedSize+100663296);var newSize=Math.min(maxHeapSize,alignMemory(Math.max(requestedSize,overGrownHeapSize),65536));var replacement=growMemory(newSize);if(replacement){return true}}return false};{initMemory();if(Module["noExitRuntime"])noExitRuntime=Module["noExitRuntime"];if(Module["print"])out=Module["print"];if(Module["printErr"])err=Module["printErr"];if(Module["wasmBinary"])wasmBinary=Module["wasmBinary"];if(Module["arguments"])arguments_=Module["arguments"];if(Module["thisProgram"])thisProgram=Module["thisProgram"];if(Module["preInit"]){if(typeof Module["preInit"]=="function")Module["preInit"]=[Module["preInit"]];while(Module["preInit"].length>0){Module["preInit"].shift()()}}}Module["wasmMemory"]=wasmMemory;function nk_has_relaxed(){var test=new Uint8Array([0,97,115,109,1,0,0,0,1,8,1,96,3,123,123,123,1,123,3,2,1,0,10,9,1,7,0,32,0,32,1,32,2,253,175,1,11]);try{return WebAssembly.validate(test)?1:0}catch(e){return 0}}var _nk_dot_f64,_nk_dot_f32,_nk_dot_bf16,_nk_dot_f16,_nk_dot_i8,_nk_dot_u8,_nk_angular_f64,_nk_angular_f32,_nk_angular_bf16,_nk_angular_f16,_nk_angular_i8,_nk_angular_u8,_nk_euclidean_f64,_nk_euclidean_f32,_nk_euclidean_bf16,_nk_euclidean_f16,_nk_euclidean_i8,_nk_euclidean_u8,_nk_sqeuclidean_f64,_nk_sqeuclidean_f32,_nk_sqeuclidean_bf16,_nk_sqeuclidean_f16,_nk_sqeuclidean_i8,_nk_sqeuclidean_u8,_nk_hamming_u8,_nk_hamming_u1,_nk_jaccard_u16,_nk_jaccard_u1,_nk_kld_f64,_nk_kld_f32,_nk_jsd_f64,_nk_jsd_f32,_nk_dots_packed_size_f64,_nk_dots_packed_size_f32,_nk_dots_packed_size_bf16,_nk_dots_packed_size_f16,_nk_dots_packed_size_i8,_nk_dots_packed_size_u8,_nk_dots_pack_f64,_nk_dots_pack_f32,_nk_dots_pack_bf16,_nk_dots_pack_f16,_nk_dots_pack_i8,_nk_dots_pack_u8,_nk_dots_packed_f64,_nk_dots_packed_f32,_nk_dots_packed_bf16,_nk_dots_packed_f16,_nk_dots_packed_i8,_nk_dots_packed_u8,_nk_dots_symmetric_f64,_nk_dots_symmetric_f32,_nk_dots_symmetric_bf16,_nk_dots_symmetric_f16,_nk_dots_symmetric_i8,_nk_dots_symmetric_u8,_nk_angulars_packed_f64,_nk_angulars_packed_f32,_nk_angulars_packed_bf16,_nk_angulars_packed_f16,_nk_angulars_symmetric_f64,_nk_angulars_symmetric_f32,_nk_angulars_symmetric_bf16,_nk_angulars_symmetric_f16,_nk_euclideans_packed_f64,_nk_euclideans_packed_f32,_nk_euclideans_packed_bf16,_nk_euclideans_packed_f16,_nk_euclideans_symmetric_f64,_nk_euclideans_symmetric_f32,_nk_euclideans_symmetric_bf16,_nk_euclideans_symmetric_f16,_nk_capabilities,_malloc,_free,__indirect_function_table;function assignWasmExports(wasmExports){_nk_dot_f64=Module["_nk_dot_f64"]=wasmExports["e"];_nk_dot_f32=Module["_nk_dot_f32"]=wasmExports["f"];_nk_dot_bf16=Module["_nk_dot_bf16"]=wasmExports["g"];_nk_dot_f16=Module["_nk_dot_f16"]=wasmExports["h"];_nk_dot_i8=Module["_nk_dot_i8"]=wasmExports["i"];_nk_dot_u8=Module["_nk_dot_u8"]=wasmExports["j"];_nk_angular_f64=Module["_nk_angular_f64"]=wasmExports["k"];_nk_angular_f32=Module["_nk_angular_f32"]=wasmExports["l"];_nk_angular_bf16=Module["_nk_angular_bf16"]=wasmExports["m"];_nk_angular_f16=Module["_nk_angular_f16"]=wasmExports["n"];_nk_angular_i8=Module["_nk_angular_i8"]=wasmExports["o"];_nk_angular_u8=Module["_nk_angular_u8"]=wasmExports["p"];_nk_euclidean_f64=Module["_nk_euclidean_f64"]=wasmExports["q"];_nk_euclidean_f32=Module["_nk_euclidean_f32"]=wasmExports["r"];_nk_euclidean_bf16=Module["_nk_euclidean_bf16"]=wasmExports["s"];_nk_euclidean_f16=Module["_nk_euclidean_f16"]=wasmExports["t"];_nk_euclidean_i8=Module["_nk_euclidean_i8"]=wasmExports["u"];_nk_euclidean_u8=Module["_nk_euclidean_u8"]=wasmExports["v"];_nk_sqeuclidean_f64=Module["_nk_sqeuclidean_f64"]=wasmExports["w"];_nk_sqeuclidean_f32=Module["_nk_sqeuclidean_f32"]=wasmExports["x"];_nk_sqeuclidean_bf16=Module["_nk_sqeuclidean_bf16"]=wasmExports["y"];_nk_sqeuclidean_f16=Module["_nk_sqeuclidean_f16"]=wasmExports["z"];_nk_sqeuclidean_i8=Module["_nk_sqeuclidean_i8"]=wasmExports["A"];_nk_sqeuclidean_u8=Module["_nk_sqeuclidean_u8"]=wasmExports["B"];_nk_hamming_u8=Module["_nk_hamming_u8"]=wasmExports["C"];_nk_hamming_u1=Module["_nk_hamming_u1"]=wasmExports["D"];_nk_jaccard_u16=Module["_nk_jaccard_u16"]=wasmExports["E"];_nk_jaccard_u1=Module["_nk_jaccard_u1"]=wasmExports["F"];_nk_kld_f64=Module["_nk_kld_f64"]=wasmExports["G"];_nk_kld_f32=Module["_nk_kld_f32"]=wasmExports["H"];_nk_jsd_f64=Module["_nk_jsd_f64"]=wasmExports["I"];_nk_jsd_f32=Module["_nk_jsd_f32"]=wasmExports["J"];_nk_dots_packed_size_f64=Module["_nk_dots_packed_size_f64"]=wasmExports["K"];_nk_dots_packed_size_f32=Module["_nk_dots_packed_size_f32"]=wasmExports["L"];_nk_dots_packed_size_bf16=Module["_nk_dots_packed_size_bf16"]=wasmExports["M"];_nk_dots_packed_size_f16=Module["_nk_dots_packed_size_f16"]=wasmExports["N"];_nk_dots_packed_size_i8=Module["_nk_dots_packed_size_i8"]=wasmExports["O"];_nk_dots_packed_size_u8=Module["_nk_dots_packed_size_u8"]=wasmExports["P"];_nk_dots_pack_f64=Module["_nk_dots_pack_f64"]=wasmExports["Q"];_nk_dots_pack_f32=Module["_nk_dots_pack_f32"]=wasmExports["R"];_nk_dots_pack_bf16=Module["_nk_dots_pack_bf16"]=wasmExports["S"];_nk_dots_pack_f16=Module["_nk_dots_pack_f16"]=wasmExports["T"];_nk_dots_pack_i8=Module["_nk_dots_pack_i8"]=wasmExports["U"];_nk_dots_pack_u8=Module["_nk_dots_pack_u8"]=wasmExports["V"];_nk_dots_packed_f64=Module["_nk_dots_packed_f64"]=wasmExports["W"];_nk_dots_packed_f32=Module["_nk_dots_packed_f32"]=wasmExports["X"];_nk_dots_packed_bf16=Module["_nk_dots_packed_bf16"]=wasmExports["Y"];_nk_dots_packed_f16=Module["_nk_dots_packed_f16"]=wasmExports["Z"];_nk_dots_packed_i8=Module["_nk_dots_packed_i8"]=wasmExports["_"];_nk_dots_packed_u8=Module["_nk_dots_packed_u8"]=wasmExports["$"];_nk_dots_symmetric_f64=Module["_nk_dots_symmetric_f64"]=wasmExports["aa"];_nk_dots_symmetric_f32=Module["_nk_dots_symmetric_f32"]=wasmExports["ba"];_nk_dots_symmetric_bf16=Module["_nk_dots_symmetric_bf16"]=wasmExports["ca"];_nk_dots_symmetric_f16=Module["_nk_dots_symmetric_f16"]=wasmExports["da"];_nk_dots_symmetric_i8=Module["_nk_dots_symmetric_i8"]=wasmExports["ea"];_nk_dots_symmetric_u8=Module["_nk_dots_symmetric_u8"]=wasmExports["fa"];_nk_angulars_packed_f64=Module["_nk_angulars_packed_f64"]=wasmExports["ga"];_nk_angulars_packed_f32=Module["_nk_angulars_packed_f32"]=wasmExports["ha"];_nk_angulars_packed_bf16=Module["_nk_angulars_packed_bf16"]=wasmExports["ia"];_nk_angulars_packed_f16=Module["_nk_angulars_packed_f16"]=wasmExports["ja"];_nk_angulars_symmetric_f64=Module["_nk_angulars_symmetric_f64"]=wasmExports["ka"];_nk_angulars_symmetric_f32=Module["_nk_angulars_symmetric_f32"]=wasmExports["la"];_nk_angulars_symmetric_bf16=Module["_nk_angulars_symmetric_bf16"]=wasmExports["ma"];_nk_angulars_symmetric_f16=Module["_nk_angulars_symmetric_f16"]=wasmExports["na"];_nk_euclideans_packed_f64=Module["_nk_euclideans_packed_f64"]=wasmExports["oa"];_nk_euclideans_packed_f32=Module["_nk_euclideans_packed_f32"]=wasmExports["pa"];_nk_euclideans_packed_bf16=Module["_nk_euclideans_packed_bf16"]=wasmExports["qa"];_nk_euclideans_packed_f16=Module["_nk_euclideans_packed_f16"]=wasmExports["ra"];_nk_euclideans_symmetric_f64=Module["_nk_euclideans_symmetric_f64"]=wasmExports["sa"];_nk_euclideans_symmetric_f32=Module["_nk_euclideans_symmetric_f32"]=wasmExports["ta"];_nk_euclideans_symmetric_bf16=Module["_nk_euclideans_symmetric_bf16"]=wasmExports["ua"];_nk_euclideans_symmetric_f16=Module["_nk_euclideans_symmetric_f16"]=wasmExports["va"];_nk_capabilities=Module["_nk_capabilities"]=wasmExports["wa"];_malloc=Module["_malloc"]=wasmExports["xa"];_free=Module["_free"]=wasmExports["ya"];__indirect_function_table=wasmExports["__indirect_function_table"]}var wasmImports={b:_emscripten_resize_heap,a:wasmMemory,c:nk_has_relaxed};function run(){preRun();function doRun(){Module["calledRun"]=true;if(ABORT)return;initRuntime();readyPromiseResolve?.(Module);Module["onRuntimeInitialized"]?.();postRun()}if(Module["setStatus"]){Module["setStatus"]("Running...");setTimeout(()=>{setTimeout(()=>Module["setStatus"](""),1);doRun()},1)}else{doRun()}}var wasmExports;wasmExports=await (createWasm());run();if(runtimeInitialized){moduleRtn=Module}else{moduleRtn=new Promise((resolve,reject)=>{readyPromiseResolve=resolve;readyPromiseReject=reject})}
2
+ ;return moduleRtn}export default NumKongModule;
@@ -0,0 +1,14 @@
1
+ /**
2
+ * @brief Self-contained browser ESM entry point for NumKong WASM.
3
+ * @file javascript/numkong-browser.ts
4
+ *
5
+ * Auto-initializes the Emscripten module on import via top-level await.
6
+ * The Emscripten glue (`numkong-emscripten.js`) and binary (`numkong.wasm`)
7
+ * must be co-located with this file (same directory or CDN prefix).
8
+ *
9
+ * Usage:
10
+ * import { dot, euclidean } from './numkong.js';
11
+ * console.log(dot(new Float32Array([1,2,3]), new Float32Array([4,5,6])));
12
+ */
13
+ export { TensorBase, VectorBase, VectorView, Vector, MatrixBase, Matrix, PackedMatrix, DType, TypedArray, KernelFamily, dtypeToString, outputDtype, Float16Array, BFloat16Array, E4M3Array, E5M2Array, BinaryArray, isFloat16Array, isBFloat16Array, isE4M3Array, isE5M2Array, isBinaryArray, } from './types.js';
14
+ export { dot, inner, euclidean, sqeuclidean, angular, hamming, jaccard, kullbackleibler, jensenshannon, getCapabilities, hasCapability, dotsPack, dotsPackedSize, dotsPacked, angularsPacked, euclideansPacked, dotsSymmetric, angularsSymmetric, euclideansSymmetric, } from './numkong-wasm.js';