numkong 7.0.0 → 7.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (315) hide show
  1. package/README.md +197 -124
  2. package/binding.gyp +34 -484
  3. package/c/dispatch_bf16.c +59 -1
  4. package/c/dispatch_e2m3.c +41 -8
  5. package/c/dispatch_e3m2.c +49 -8
  6. package/c/dispatch_e4m3.c +51 -9
  7. package/c/dispatch_e5m2.c +45 -1
  8. package/c/dispatch_f16.c +79 -26
  9. package/c/dispatch_f16c.c +5 -5
  10. package/c/dispatch_f32.c +56 -0
  11. package/c/dispatch_f64.c +52 -0
  12. package/c/dispatch_i4.c +3 -0
  13. package/c/dispatch_i8.c +62 -3
  14. package/c/dispatch_other.c +18 -0
  15. package/c/dispatch_u1.c +54 -9
  16. package/c/dispatch_u4.c +3 -0
  17. package/c/dispatch_u8.c +64 -3
  18. package/c/numkong.c +3 -0
  19. package/include/README.md +79 -9
  20. package/include/numkong/attention/sapphireamx.h +278 -276
  21. package/include/numkong/attention/sme.h +983 -977
  22. package/include/numkong/attention.h +1 -1
  23. package/include/numkong/capabilities.h +289 -94
  24. package/include/numkong/cast/README.md +40 -40
  25. package/include/numkong/cast/diamond.h +64 -0
  26. package/include/numkong/cast/haswell.h +42 -194
  27. package/include/numkong/cast/icelake.h +42 -37
  28. package/include/numkong/cast/loongsonasx.h +252 -0
  29. package/include/numkong/cast/neon.h +216 -249
  30. package/include/numkong/cast/powervsx.h +449 -0
  31. package/include/numkong/cast/rvv.h +223 -274
  32. package/include/numkong/cast/sapphire.h +18 -18
  33. package/include/numkong/cast/serial.h +1018 -944
  34. package/include/numkong/cast/skylake.h +82 -23
  35. package/include/numkong/cast/v128relaxed.h +462 -105
  36. package/include/numkong/cast.h +24 -0
  37. package/include/numkong/cast.hpp +44 -0
  38. package/include/numkong/curved/README.md +17 -17
  39. package/include/numkong/curved/neon.h +131 -7
  40. package/include/numkong/curved/neonbfdot.h +6 -7
  41. package/include/numkong/curved/rvv.h +26 -26
  42. package/include/numkong/curved/smef64.h +186 -182
  43. package/include/numkong/curved.h +14 -18
  44. package/include/numkong/dot/README.md +154 -137
  45. package/include/numkong/dot/alder.h +43 -43
  46. package/include/numkong/dot/diamond.h +158 -0
  47. package/include/numkong/dot/genoa.h +4 -30
  48. package/include/numkong/dot/haswell.h +215 -180
  49. package/include/numkong/dot/icelake.h +190 -76
  50. package/include/numkong/dot/loongsonasx.h +671 -0
  51. package/include/numkong/dot/neon.h +124 -73
  52. package/include/numkong/dot/neonbfdot.h +11 -12
  53. package/include/numkong/dot/neonfhm.h +44 -46
  54. package/include/numkong/dot/neonfp8.h +323 -0
  55. package/include/numkong/dot/neonsdot.h +190 -76
  56. package/include/numkong/dot/powervsx.h +752 -0
  57. package/include/numkong/dot/rvv.h +92 -84
  58. package/include/numkong/dot/rvvbf16.h +12 -12
  59. package/include/numkong/dot/rvvhalf.h +12 -12
  60. package/include/numkong/dot/sapphire.h +4 -4
  61. package/include/numkong/dot/serial.h +66 -30
  62. package/include/numkong/dot/sierra.h +31 -31
  63. package/include/numkong/dot/skylake.h +142 -110
  64. package/include/numkong/dot/sve.h +217 -177
  65. package/include/numkong/dot/svebfdot.h +10 -10
  66. package/include/numkong/dot/svehalf.h +85 -41
  67. package/include/numkong/dot/svesdot.h +89 -0
  68. package/include/numkong/dot/v128relaxed.h +124 -89
  69. package/include/numkong/dot.h +114 -48
  70. package/include/numkong/dots/README.md +203 -203
  71. package/include/numkong/dots/alder.h +12 -9
  72. package/include/numkong/dots/diamond.h +86 -0
  73. package/include/numkong/dots/genoa.h +10 -4
  74. package/include/numkong/dots/haswell.h +63 -48
  75. package/include/numkong/dots/icelake.h +27 -18
  76. package/include/numkong/dots/loongsonasx.h +176 -0
  77. package/include/numkong/dots/neon.h +14 -11
  78. package/include/numkong/dots/neonbfdot.h +4 -3
  79. package/include/numkong/dots/neonfhm.h +11 -9
  80. package/include/numkong/dots/neonfp8.h +99 -0
  81. package/include/numkong/dots/neonsdot.h +48 -12
  82. package/include/numkong/dots/powervsx.h +194 -0
  83. package/include/numkong/dots/rvv.h +451 -344
  84. package/include/numkong/dots/sapphireamx.h +1028 -984
  85. package/include/numkong/dots/serial.h +213 -197
  86. package/include/numkong/dots/sierra.h +10 -7
  87. package/include/numkong/dots/skylake.h +47 -36
  88. package/include/numkong/dots/sme.h +2001 -2364
  89. package/include/numkong/dots/smebi32.h +175 -162
  90. package/include/numkong/dots/smef64.h +328 -323
  91. package/include/numkong/dots/v128relaxed.h +64 -41
  92. package/include/numkong/dots.h +573 -293
  93. package/include/numkong/dots.hpp +45 -43
  94. package/include/numkong/each/README.md +133 -137
  95. package/include/numkong/each/haswell.h +6 -6
  96. package/include/numkong/each/icelake.h +7 -7
  97. package/include/numkong/each/neon.h +76 -42
  98. package/include/numkong/each/neonbfdot.h +11 -12
  99. package/include/numkong/each/neonhalf.h +24 -116
  100. package/include/numkong/each/rvv.h +28 -28
  101. package/include/numkong/each/sapphire.h +27 -161
  102. package/include/numkong/each/serial.h +6 -6
  103. package/include/numkong/each/skylake.h +7 -7
  104. package/include/numkong/each/v128relaxed.h +562 -0
  105. package/include/numkong/each.h +148 -62
  106. package/include/numkong/each.hpp +2 -2
  107. package/include/numkong/geospatial/README.md +18 -18
  108. package/include/numkong/geospatial/haswell.h +365 -325
  109. package/include/numkong/geospatial/neon.h +350 -306
  110. package/include/numkong/geospatial/rvv.h +4 -4
  111. package/include/numkong/geospatial/skylake.h +376 -340
  112. package/include/numkong/geospatial/v128relaxed.h +366 -327
  113. package/include/numkong/geospatial.h +17 -17
  114. package/include/numkong/matrix.hpp +4 -4
  115. package/include/numkong/maxsim/README.md +14 -14
  116. package/include/numkong/maxsim/alder.h +6 -6
  117. package/include/numkong/maxsim/genoa.h +4 -4
  118. package/include/numkong/maxsim/haswell.h +6 -6
  119. package/include/numkong/maxsim/icelake.h +18 -18
  120. package/include/numkong/maxsim/neonsdot.h +21 -21
  121. package/include/numkong/maxsim/sapphireamx.h +14 -14
  122. package/include/numkong/maxsim/serial.h +6 -6
  123. package/include/numkong/maxsim/sme.h +221 -196
  124. package/include/numkong/maxsim/v128relaxed.h +6 -6
  125. package/include/numkong/mesh/README.md +62 -56
  126. package/include/numkong/mesh/haswell.h +339 -464
  127. package/include/numkong/mesh/neon.h +1100 -519
  128. package/include/numkong/mesh/neonbfdot.h +36 -68
  129. package/include/numkong/mesh/rvv.h +530 -435
  130. package/include/numkong/mesh/serial.h +75 -91
  131. package/include/numkong/mesh/skylake.h +1627 -302
  132. package/include/numkong/mesh/v128relaxed.h +443 -330
  133. package/include/numkong/mesh.h +63 -49
  134. package/include/numkong/mesh.hpp +4 -4
  135. package/include/numkong/numkong.h +3 -3
  136. package/include/numkong/numkong.hpp +1 -0
  137. package/include/numkong/probability/README.md +23 -19
  138. package/include/numkong/probability/neon.h +82 -52
  139. package/include/numkong/probability/rvv.h +28 -23
  140. package/include/numkong/probability/serial.h +51 -39
  141. package/include/numkong/probability.h +20 -23
  142. package/include/numkong/random.h +1 -1
  143. package/include/numkong/reduce/README.md +143 -138
  144. package/include/numkong/reduce/alder.h +81 -77
  145. package/include/numkong/reduce/haswell.h +222 -220
  146. package/include/numkong/reduce/neon.h +629 -519
  147. package/include/numkong/reduce/neonbfdot.h +7 -218
  148. package/include/numkong/reduce/neonfhm.h +9 -381
  149. package/include/numkong/reduce/neonsdot.h +9 -9
  150. package/include/numkong/reduce/rvv.h +928 -802
  151. package/include/numkong/reduce/serial.h +23 -27
  152. package/include/numkong/reduce/sierra.h +20 -20
  153. package/include/numkong/reduce/skylake.h +326 -324
  154. package/include/numkong/reduce/v128relaxed.h +52 -52
  155. package/include/numkong/reduce.h +4 -23
  156. package/include/numkong/reduce.hpp +156 -11
  157. package/include/numkong/scalar/README.md +6 -6
  158. package/include/numkong/scalar/haswell.h +26 -17
  159. package/include/numkong/scalar/loongsonasx.h +74 -0
  160. package/include/numkong/scalar/neon.h +9 -9
  161. package/include/numkong/scalar/powervsx.h +96 -0
  162. package/include/numkong/scalar/rvv.h +2 -2
  163. package/include/numkong/scalar/sapphire.h +21 -10
  164. package/include/numkong/scalar/serial.h +21 -21
  165. package/include/numkong/scalar.h +13 -0
  166. package/include/numkong/set/README.md +28 -28
  167. package/include/numkong/set/haswell.h +12 -12
  168. package/include/numkong/set/icelake.h +14 -14
  169. package/include/numkong/set/loongsonasx.h +181 -0
  170. package/include/numkong/set/neon.h +17 -18
  171. package/include/numkong/set/powervsx.h +326 -0
  172. package/include/numkong/set/rvv.h +4 -4
  173. package/include/numkong/set/serial.h +6 -6
  174. package/include/numkong/set/sve.h +60 -59
  175. package/include/numkong/set/v128relaxed.h +6 -6
  176. package/include/numkong/set.h +21 -7
  177. package/include/numkong/sets/README.md +26 -26
  178. package/include/numkong/sets/loongsonasx.h +52 -0
  179. package/include/numkong/sets/powervsx.h +65 -0
  180. package/include/numkong/sets/smebi32.h +395 -364
  181. package/include/numkong/sets.h +83 -40
  182. package/include/numkong/sparse/README.md +4 -4
  183. package/include/numkong/sparse/icelake.h +101 -101
  184. package/include/numkong/sparse/serial.h +1 -1
  185. package/include/numkong/sparse/sve2.h +137 -141
  186. package/include/numkong/sparse/turin.h +12 -12
  187. package/include/numkong/sparse.h +10 -10
  188. package/include/numkong/spatial/README.md +230 -226
  189. package/include/numkong/spatial/alder.h +113 -116
  190. package/include/numkong/spatial/diamond.h +240 -0
  191. package/include/numkong/spatial/genoa.h +0 -68
  192. package/include/numkong/spatial/haswell.h +74 -55
  193. package/include/numkong/spatial/icelake.h +539 -58
  194. package/include/numkong/spatial/loongsonasx.h +483 -0
  195. package/include/numkong/spatial/neon.h +125 -52
  196. package/include/numkong/spatial/neonbfdot.h +8 -9
  197. package/include/numkong/spatial/neonfp8.h +258 -0
  198. package/include/numkong/spatial/neonsdot.h +180 -12
  199. package/include/numkong/spatial/powervsx.h +738 -0
  200. package/include/numkong/spatial/rvv.h +146 -139
  201. package/include/numkong/spatial/rvvbf16.h +17 -12
  202. package/include/numkong/spatial/rvvhalf.h +13 -10
  203. package/include/numkong/spatial/serial.h +13 -12
  204. package/include/numkong/spatial/sierra.h +232 -39
  205. package/include/numkong/spatial/skylake.h +73 -74
  206. package/include/numkong/spatial/sve.h +93 -72
  207. package/include/numkong/spatial/svebfdot.h +29 -29
  208. package/include/numkong/spatial/svehalf.h +52 -26
  209. package/include/numkong/spatial/svesdot.h +142 -0
  210. package/include/numkong/spatial/v128relaxed.h +293 -41
  211. package/include/numkong/spatial.h +338 -82
  212. package/include/numkong/spatials/README.md +194 -194
  213. package/include/numkong/spatials/diamond.h +82 -0
  214. package/include/numkong/spatials/haswell.h +2 -2
  215. package/include/numkong/spatials/loongsonasx.h +153 -0
  216. package/include/numkong/spatials/neonfp8.h +111 -0
  217. package/include/numkong/spatials/neonsdot.h +34 -0
  218. package/include/numkong/spatials/powervsx.h +153 -0
  219. package/include/numkong/spatials/rvv.h +259 -243
  220. package/include/numkong/spatials/sapphireamx.h +173 -173
  221. package/include/numkong/spatials/serial.h +2 -2
  222. package/include/numkong/spatials/skylake.h +2 -2
  223. package/include/numkong/spatials/sme.h +590 -605
  224. package/include/numkong/spatials/smef64.h +139 -130
  225. package/include/numkong/spatials/v128relaxed.h +2 -2
  226. package/include/numkong/spatials.h +820 -500
  227. package/include/numkong/spatials.hpp +49 -48
  228. package/include/numkong/tensor.hpp +406 -17
  229. package/include/numkong/trigonometry/README.md +19 -19
  230. package/include/numkong/trigonometry/haswell.h +402 -401
  231. package/include/numkong/trigonometry/neon.h +386 -387
  232. package/include/numkong/trigonometry/rvv.h +52 -51
  233. package/include/numkong/trigonometry/serial.h +13 -13
  234. package/include/numkong/trigonometry/skylake.h +373 -369
  235. package/include/numkong/trigonometry/v128relaxed.h +375 -374
  236. package/include/numkong/trigonometry.h +13 -13
  237. package/include/numkong/trigonometry.hpp +2 -2
  238. package/include/numkong/types.h +287 -49
  239. package/include/numkong/types.hpp +436 -12
  240. package/include/numkong/vector.hpp +82 -14
  241. package/javascript/dist/cjs/numkong-wasm.js +6 -12
  242. package/javascript/dist/cjs/numkong.d.ts +7 -1
  243. package/javascript/dist/cjs/numkong.js +37 -11
  244. package/javascript/dist/cjs/types.d.ts +9 -0
  245. package/javascript/dist/cjs/types.js +96 -0
  246. package/javascript/dist/esm/numkong-browser.d.ts +14 -0
  247. package/javascript/dist/esm/numkong-browser.js +23 -0
  248. package/javascript/dist/esm/numkong-wasm.js +6 -12
  249. package/javascript/dist/esm/numkong.d.ts +7 -1
  250. package/javascript/dist/esm/numkong.js +37 -11
  251. package/javascript/dist/esm/types.d.ts +9 -0
  252. package/javascript/dist/esm/types.js +96 -0
  253. package/javascript/node-gyp-build.d.ts +4 -1
  254. package/javascript/numkong-browser.ts +40 -0
  255. package/javascript/numkong-wasm.ts +7 -13
  256. package/javascript/numkong.c +5 -26
  257. package/javascript/numkong.ts +36 -11
  258. package/javascript/tsconfig-base.json +1 -0
  259. package/javascript/tsconfig-cjs.json +6 -1
  260. package/javascript/types.ts +110 -0
  261. package/numkong.gypi +101 -0
  262. package/package.json +34 -13
  263. package/probes/arm_neon.c +8 -0
  264. package/probes/arm_neon_bfdot.c +9 -0
  265. package/probes/arm_neon_fhm.c +9 -0
  266. package/probes/arm_neon_half.c +8 -0
  267. package/probes/arm_neon_sdot.c +9 -0
  268. package/probes/arm_neonfp8.c +9 -0
  269. package/probes/arm_sme.c +16 -0
  270. package/probes/arm_sme2.c +16 -0
  271. package/probes/arm_sme2p1.c +16 -0
  272. package/probes/arm_sme_bf16.c +16 -0
  273. package/probes/arm_sme_bi32.c +16 -0
  274. package/probes/arm_sme_f64.c +16 -0
  275. package/probes/arm_sme_fa64.c +14 -0
  276. package/probes/arm_sme_half.c +16 -0
  277. package/probes/arm_sme_lut2.c +15 -0
  278. package/probes/arm_sve.c +18 -0
  279. package/probes/arm_sve2.c +20 -0
  280. package/probes/arm_sve2p1.c +18 -0
  281. package/probes/arm_sve_bfdot.c +20 -0
  282. package/probes/arm_sve_half.c +18 -0
  283. package/probes/arm_sve_sdot.c +21 -0
  284. package/probes/loongarch_lasx.c +12 -0
  285. package/probes/power_vsx.c +12 -0
  286. package/probes/probe.js +127 -0
  287. package/probes/riscv_rvv.c +14 -0
  288. package/probes/riscv_rvv_bb.c +15 -0
  289. package/probes/riscv_rvv_bf16.c +17 -0
  290. package/probes/riscv_rvv_half.c +14 -0
  291. package/probes/wasm_v128relaxed.c +11 -0
  292. package/probes/x86_alder.c +17 -0
  293. package/probes/x86_diamond.c +17 -0
  294. package/probes/x86_genoa.c +17 -0
  295. package/probes/x86_graniteamx.c +19 -0
  296. package/probes/x86_haswell.c +11 -0
  297. package/probes/x86_icelake.c +17 -0
  298. package/probes/x86_sapphire.c +16 -0
  299. package/probes/x86_sapphireamx.c +18 -0
  300. package/probes/x86_sierra.c +17 -0
  301. package/probes/x86_skylake.c +15 -0
  302. package/probes/x86_turin.c +17 -0
  303. package/wasm/numkong-emscripten.js +2 -0
  304. package/wasm/numkong.d.ts +14 -0
  305. package/wasm/numkong.js +1124 -0
  306. package/wasm/numkong.wasm +0 -0
  307. package/include/numkong/curved/neonhalf.h +0 -212
  308. package/include/numkong/dot/neonhalf.h +0 -198
  309. package/include/numkong/dots/neonhalf.h +0 -57
  310. package/include/numkong/mesh/neonhalf.h +0 -616
  311. package/include/numkong/reduce/neonhalf.h +0 -157
  312. package/include/numkong/spatial/neonhalf.h +0 -118
  313. package/include/numkong/spatial/sapphire.h +0 -343
  314. package/include/numkong/spatials/neonhalf.h +0 -58
  315. package/javascript/README.md +0 -246
@@ -43,7 +43,7 @@ export declare const Capability: {
43
43
  readonly SVE: bigint;
44
44
  readonly SVEHALF: bigint;
45
45
  readonly SVESDOT: bigint;
46
- readonly SIERRA: bigint;
46
+ readonly ALDER: bigint;
47
47
  readonly SVEBFDOT: bigint;
48
48
  readonly SVE2: bigint;
49
49
  readonly V128RELAXED: bigint;
@@ -64,6 +64,12 @@ export declare const Capability: {
64
64
  readonly SMEBF16: bigint;
65
65
  readonly SMELUT2: bigint;
66
66
  readonly RVVBB: bigint;
67
+ readonly SIERRA: bigint;
68
+ readonly SMEBI32: bigint;
69
+ readonly LOONGSONASX: bigint;
70
+ readonly POWERVSX: bigint;
71
+ readonly DIAMOND: bigint;
72
+ readonly NEONFP8: bigint;
67
73
  };
68
74
  export { Float16Array, BFloat16Array, E4M3Array, E5M2Array, BinaryArray, TensorBase, VectorBase, VectorView, Vector, MatrixBase, Matrix, PackedMatrix, outputDtype };
69
75
  /** Convert a single FP16 value (as uint16 bits) to FP32 */
@@ -25,15 +25,27 @@
25
25
  * ```
26
26
  */
27
27
  import build from "node-gyp-build";
28
+ import { createRequire } from "node:module";
28
29
  import * as path from "node:path";
29
30
  import { existsSync } from "node:fs";
30
31
  import { getFileName, getRoot } from "bindings";
31
32
  import { setConversionFunctions, Float16Array, BFloat16Array, E4M3Array, E5M2Array, BinaryArray, TensorBase, VectorBase, VectorView, Vector, MatrixBase, Matrix, PackedMatrix, DType, dtypeToString, outputDtype } from "./types.js";
32
- let compiled;
33
- try {
34
- let builddir = getBuildDir(getDirName());
35
- compiled = build(builddir);
36
- // Initialize conversion functions for types.ts
33
+ function loadNativeAddon() {
34
+ // Tier 1: platform-specific optional dependency (@numkong/<os>-<arch>)
35
+ try {
36
+ const req = createRequire(path.join(getDirName(), "noop.js"));
37
+ return req(`@numkong/${process.platform}-${process.arch}`);
38
+ }
39
+ catch { }
40
+ // Tier 2: node-gyp-build fallback (local dev, unsupported platform, build-from-source)
41
+ try {
42
+ return build(getBuildDir(getDirName()));
43
+ }
44
+ catch { }
45
+ return null;
46
+ }
47
+ let compiled = loadNativeAddon();
48
+ if (compiled) {
37
49
  setConversionFunctions({
38
50
  castF16ToF32: compiled.castF16ToF32,
39
51
  castF32ToF16: compiled.castF32ToF16,
@@ -46,11 +58,10 @@ try {
46
58
  cast: compiled.cast,
47
59
  });
48
60
  }
49
- catch (e) {
50
- // Native addon not available
51
- // For WASM usage, import the Emscripten module directly (see test/test-wasm.mjs)
52
- throw new Error("NumKong native addon not found. Build with `npm run build` or use WASM " +
53
- "by importing the Emscripten module directly. See test/test-wasm.mjs for examples.");
61
+ else {
62
+ throw new Error("NumKong native addon not found. Install with `npm install numkong` (which fetches " +
63
+ "the prebuilt binary), or build from source with `npm run install`. " +
64
+ "For WASM, import from 'numkong/wasm' instead.");
54
65
  }
55
66
  /**
56
67
  * CPU capability bit masks in chronological order (by first commercial silicon).
@@ -70,7 +81,7 @@ export const Capability = {
70
81
  SVE: 1n << 10n, // 2020: ARM SVE
71
82
  SVEHALF: 1n << 11n, // 2020: ARM SVE FP16
72
83
  SVESDOT: 1n << 12n, // 2020: ARM SVE i8 dot
73
- SIERRA: 1n << 13n, // 2021: Intel AVX2+VNNI
84
+ ALDER: 1n << 13n, // 2021: Intel AVX2+VNNI
74
85
  SVEBFDOT: 1n << 14n, // 2021: ARM SVE BF16
75
86
  SVE2: 1n << 15n, // 2022: ARM SVE2
76
87
  V128RELAXED: 1n << 16n, // 2022: WASM Relaxed SIMD
@@ -91,6 +102,12 @@ export const Capability = {
91
102
  SMEBF16: 1n << 31n, // 2025+: ARM SME B16B16
92
103
  SMELUT2: 1n << 32n, // 2025+: ARM SME LUTv2
93
104
  RVVBB: 1n << 33n, // 2025+: RISC-V Zvbb
105
+ SIERRA: 1n << 34n, // 2024: Intel AVXVNNIINT8
106
+ SMEBI32: 1n << 35n, // 2025+: ARM SME BI32I32
107
+ LOONGSONASX: 1n << 36n, // LoongArch LASX 256-bit SIMD
108
+ POWERVSX: 1n << 37n, // Power VSX 128-bit SIMD
109
+ DIAMOND: 1n << 38n, // 2025+: Intel AVX10.2
110
+ NEONFP8: 1n << 39n, // ARM NEON FP8
94
111
  };
95
112
  export { Float16Array, BFloat16Array, E4M3Array, E5M2Array, BinaryArray, TensorBase, VectorBase, VectorView, Vector, MatrixBase, Matrix, PackedMatrix, outputDtype };
96
113
  /** Convert a single FP16 value (as uint16 bits) to FP32 */
@@ -448,5 +465,14 @@ function getDirName() {
448
465
  return __dirname;
449
466
  }
450
467
  catch (e) { }
468
+ // Fall back to cwd, which is typically the project root in dev and CI.
469
+ // This helps runtimes like Deno and Bun where the `bindings` module's
470
+ // V8 stack-trace hack may not resolve correctly.
471
+ try {
472
+ const cwd = process.cwd();
473
+ if (existsSync(path.join(cwd, "build")) || existsSync(path.join(cwd, "prebuilds")))
474
+ return cwd;
475
+ }
476
+ catch (e) { }
451
477
  return getRoot(getFileName());
452
478
  }
@@ -74,6 +74,7 @@ export declare abstract class VectorBase extends TensorBase {
74
74
  */
75
75
  export declare class VectorView extends VectorBase {
76
76
  constructor(buffer: ArrayBuffer, byteOffset: number, length: number, dtype: DType);
77
+ toString(): string;
77
78
  /** @brief Create a VectorView from any TypedArray, inferring or accepting dtype. */
78
79
  static from(arr: TypedArray, dtype?: DType): VectorView;
79
80
  }
@@ -86,6 +87,7 @@ export declare class VectorView extends VectorBase {
86
87
  export declare class Vector extends VectorBase {
87
88
  constructor(length: number, dtype: DType);
88
89
  constructor(buffer: ArrayBuffer, length: number, dtype: DType);
90
+ toString(): string;
89
91
  /** @brief Create an owning Vector by copying data from a TypedArray. */
90
92
  static fromTypedArray(arr: TypedArray, dtype?: DType): Vector;
91
93
  /** @brief Create an owning Vector by copying data from any TensorBase. */
@@ -117,6 +119,7 @@ export declare abstract class MatrixBase extends TensorBase {
117
119
  export declare class Matrix extends MatrixBase {
118
120
  constructor(rows: number, cols: number, dtype: DType);
119
121
  constructor(buffer: ArrayBuffer, byteOffset: number, dtype: DType, rows: number, cols: number, rowStride?: number, colStride?: number);
122
+ toString(): string;
120
123
  static fromTypedArray(array: TypedArray, rows: number, cols: number, dtype?: DType): Matrix;
121
124
  toTypedArray(): TypedArray;
122
125
  row(index: number): VectorView;
@@ -138,6 +141,7 @@ export declare class PackedMatrix {
138
141
  constructor(buffer: ArrayBuffer, width: number, depth: number, dtype: DType, byteLength: number);
139
142
  dispose(): void;
140
143
  get disposed(): boolean;
144
+ toString(): string;
141
145
  }
142
146
  /** @brief Kernel family identifiers for output dtype resolution. */
143
147
  export type KernelFamily = 'dots' | 'angulars' | 'euclideans';
@@ -174,6 +178,7 @@ export declare class Float16Array extends Uint16Array {
174
178
  * @param value f32 value to encode and store
175
179
  */
176
180
  setFloat32(index: number, value: number): void;
181
+ toString(): string;
177
182
  }
178
183
  /**
179
184
  * @brief Brain Float 16 (bf16)
@@ -190,6 +195,7 @@ export declare class BFloat16Array extends Uint16Array {
190
195
  toFloat32Array(): Float32Array;
191
196
  getFloat32(index: number): number;
192
197
  setFloat32(index: number, value: number): void;
198
+ toString(): string;
193
199
  }
194
200
  /**
195
201
  * @brief FP8 E4M3 (4-bit exponent, 3-bit mantissa)
@@ -205,6 +211,7 @@ export declare class E4M3Array extends Uint8Array {
205
211
  toFloat32Array(): Float32Array;
206
212
  getFloat32(index: number): number;
207
213
  setFloat32(index: number, value: number): void;
214
+ toString(): string;
208
215
  }
209
216
  /**
210
217
  * @brief FP8 E5M2 (5-bit exponent, 2-bit mantissa)
@@ -220,6 +227,7 @@ export declare class E5M2Array extends Uint8Array {
220
227
  toFloat32Array(): Float32Array;
221
228
  getFloat32(index: number): number;
222
229
  setFloat32(index: number, value: number): void;
230
+ toString(): string;
223
231
  }
224
232
  /**
225
233
  * @brief Binary Array (u1) - Bit-packed binary vectors
@@ -260,6 +268,7 @@ export declare class BinaryArray extends Uint8Array {
260
268
  * @returns Binary array with quantized values
261
269
  */
262
270
  static fromFloat64Array(vector: Float64Array): BinaryArray;
271
+ toString(): string;
263
272
  }
264
273
  /**
265
274
  * @brief Type guard to check if an object is a Float16Array.
@@ -102,6 +102,12 @@ export class VectorView extends VectorBase {
102
102
  constructor(buffer, byteOffset, length, dtype) {
103
103
  super(buffer, byteOffset, length, dtype);
104
104
  }
105
+ toString() {
106
+ return `VectorView(${this.length}, ${dtypeToString(this.dtype)})`;
107
+ }
108
+ [Symbol.for('nodejs.util.inspect.custom')]() {
109
+ return this.toString();
110
+ }
105
111
  /** @brief Create a VectorView from any TypedArray, inferring or accepting dtype. */
106
112
  static from(arr, dtype) {
107
113
  const d = dtype ?? inferDtype(arr);
@@ -143,6 +149,12 @@ export class Vector extends VectorBase {
143
149
  super(lengthOrBuffer, 0, dtypeOrLength, dtype);
144
150
  }
145
151
  }
152
+ toString() {
153
+ return `Vector(${this.length}, ${dtypeToString(this.dtype)})`;
154
+ }
155
+ [Symbol.for('nodejs.util.inspect.custom')]() {
156
+ return this.toString();
157
+ }
146
158
  /** @brief Create an owning Vector by copying data from a TypedArray. */
147
159
  static fromTypedArray(arr, dtype) {
148
160
  const d = dtype ?? inferDtype(arr);
@@ -238,6 +250,12 @@ export class Matrix extends MatrixBase {
238
250
  super(rowsOrBuffer, colsOrByteOffset, dtype, r, c, rowStride ?? c * bpe, colStride ?? bpe);
239
251
  }
240
252
  }
253
+ toString() {
254
+ return `Matrix(${this.rows}\u00d7${this.cols}, ${dtypeToString(this.dtype)})`;
255
+ }
256
+ [Symbol.for('nodejs.util.inspect.custom')]() {
257
+ return this.toString();
258
+ }
241
259
  static fromTypedArray(array, rows, cols, dtype) {
242
260
  const d = dtype ?? inferDtype(array);
243
261
  const buf = array.buffer.slice(array.byteOffset, array.byteOffset + array.byteLength);
@@ -277,6 +295,12 @@ export class PackedMatrix {
277
295
  }
278
296
  dispose() { this._disposed = true; }
279
297
  get disposed() { return this._disposed; }
298
+ toString() {
299
+ return `PackedMatrix(${this.width}\u00d7${this.depth}, ${dtypeToString(this.dtype)}, ${this.byteLength} bytes)`;
300
+ }
301
+ [Symbol.for('nodejs.util.inspect.custom')]() {
302
+ return this.toString();
303
+ }
280
304
  }
281
305
  /**
282
306
  * @brief Determines the output dtype for a given kernel family and input dtype.
@@ -363,6 +387,21 @@ export class Float16Array extends Uint16Array {
363
387
  }
364
388
  this[index] = conversionFunctions.castF32ToF16(value);
365
389
  }
390
+ toString() {
391
+ if (!conversionFunctions)
392
+ return `Float16Array(${this.length})`;
393
+ const limit = Math.min(this.length, 20);
394
+ const parts = [];
395
+ for (let i = 0; i < limit; i++) {
396
+ const f = conversionFunctions.castF16ToF32(this[i]);
397
+ parts.push(`${f} [0x${this[i].toString(16).padStart(4, '0')}]`);
398
+ }
399
+ const suffix = this.length > 20 ? ', ...' : '';
400
+ return `Float16Array(${this.length}) [${parts.join(', ')}${suffix}]`;
401
+ }
402
+ [Symbol.for('nodejs.util.inspect.custom')]() {
403
+ return this.toString();
404
+ }
366
405
  }
367
406
  /**
368
407
  * @brief Brain Float 16 (bf16)
@@ -415,6 +454,21 @@ export class BFloat16Array extends Uint16Array {
415
454
  }
416
455
  this[index] = conversionFunctions.castF32ToBF16(value);
417
456
  }
457
+ toString() {
458
+ if (!conversionFunctions)
459
+ return `BFloat16Array(${this.length})`;
460
+ const limit = Math.min(this.length, 20);
461
+ const parts = [];
462
+ for (let i = 0; i < limit; i++) {
463
+ const f = conversionFunctions.castBF16ToF32(this[i]);
464
+ parts.push(`${f} [0x${this[i].toString(16).padStart(4, '0')}]`);
465
+ }
466
+ const suffix = this.length > 20 ? ', ...' : '';
467
+ return `BFloat16Array(${this.length}) [${parts.join(', ')}${suffix}]`;
468
+ }
469
+ [Symbol.for('nodejs.util.inspect.custom')]() {
470
+ return this.toString();
471
+ }
418
472
  }
419
473
  /**
420
474
  * @brief FP8 E4M3 (4-bit exponent, 3-bit mantissa)
@@ -466,6 +520,21 @@ export class E4M3Array extends Uint8Array {
466
520
  }
467
521
  this[index] = conversionFunctions.castF32ToE4M3(value);
468
522
  }
523
+ toString() {
524
+ if (!conversionFunctions)
525
+ return `E4M3Array(${this.length})`;
526
+ const limit = Math.min(this.length, 20);
527
+ const parts = [];
528
+ for (let i = 0; i < limit; i++) {
529
+ const f = conversionFunctions.castE4M3ToF32(this[i]);
530
+ parts.push(`${f} [0x${this[i].toString(16).padStart(2, '0')}]`);
531
+ }
532
+ const suffix = this.length > 20 ? ', ...' : '';
533
+ return `E4M3Array(${this.length}) [${parts.join(', ')}${suffix}]`;
534
+ }
535
+ [Symbol.for('nodejs.util.inspect.custom')]() {
536
+ return this.toString();
537
+ }
469
538
  }
470
539
  /**
471
540
  * @brief FP8 E5M2 (5-bit exponent, 2-bit mantissa)
@@ -517,6 +586,21 @@ export class E5M2Array extends Uint8Array {
517
586
  }
518
587
  this[index] = conversionFunctions.castF32ToE5M2(value);
519
588
  }
589
+ toString() {
590
+ if (!conversionFunctions)
591
+ return `E5M2Array(${this.length})`;
592
+ const limit = Math.min(this.length, 20);
593
+ const parts = [];
594
+ for (let i = 0; i < limit; i++) {
595
+ const f = conversionFunctions.castE5M2ToF32(this[i]);
596
+ parts.push(`${f} [0x${this[i].toString(16).padStart(2, '0')}]`);
597
+ }
598
+ const suffix = this.length > 20 ? ', ...' : '';
599
+ return `E5M2Array(${this.length}) [${parts.join(', ')}${suffix}]`;
600
+ }
601
+ [Symbol.for('nodejs.util.inspect.custom')]() {
602
+ return this.toString();
603
+ }
520
604
  }
521
605
  /**
522
606
  * @brief Binary Array (u1) - Bit-packed binary vectors
@@ -597,6 +681,18 @@ export class BinaryArray extends Uint8Array {
597
681
  }
598
682
  return binary;
599
683
  }
684
+ toString() {
685
+ const limit = Math.min(this.length, 20);
686
+ const parts = [];
687
+ for (let i = 0; i < limit; i++) {
688
+ parts.push(`0b${this[i].toString(2).padStart(8, '0')}`);
689
+ }
690
+ const suffix = this.length > 20 ? ', ...' : '';
691
+ return `BinaryArray(${this._bitLength}) [${parts.join(', ')}${suffix}]`;
692
+ }
693
+ [Symbol.for('nodejs.util.inspect.custom')]() {
694
+ return this.toString();
695
+ }
600
696
  }
601
697
  /**
602
698
  * @brief Type guard to check if an object is a Float16Array.
@@ -1 +1,4 @@
1
- declare module "node-gyp-build";
1
+ declare module "node-gyp-build" {
2
+ function build(dir: string): any;
3
+ export = build;
4
+ }
@@ -0,0 +1,40 @@
1
+ /**
2
+ * @brief Self-contained browser ESM entry point for NumKong WASM.
3
+ * @file javascript/numkong-browser.ts
4
+ *
5
+ * Auto-initializes the Emscripten module on import via top-level await.
6
+ * The Emscripten glue (`numkong-emscripten.js`) and binary (`numkong.wasm`)
7
+ * must be co-located with this file (same directory or CDN prefix).
8
+ *
9
+ * Usage:
10
+ * import { dot, euclidean } from './numkong.js';
11
+ * console.log(dot(new Float32Array([1,2,3]), new Float32Array([4,5,6])));
12
+ */
13
+
14
+ export {
15
+ TensorBase, VectorBase, VectorView, Vector,
16
+ MatrixBase, Matrix, PackedMatrix,
17
+ DType, TypedArray, KernelFamily,
18
+ dtypeToString, outputDtype,
19
+ Float16Array, BFloat16Array, E4M3Array, E5M2Array, BinaryArray,
20
+ isFloat16Array, isBFloat16Array, isE4M3Array, isE5M2Array, isBinaryArray,
21
+ } from './types.js';
22
+
23
+ import { initWasm } from './numkong-wasm.js';
24
+ export {
25
+ dot, inner, euclidean, sqeuclidean, angular,
26
+ hamming, jaccard, kullbackleibler, jensenshannon,
27
+ getCapabilities, hasCapability,
28
+ dotsPack, dotsPackedSize,
29
+ dotsPacked, angularsPacked, euclideansPacked,
30
+ dotsSymmetric, angularsSymmetric, euclideansSymmetric,
31
+ } from './numkong-wasm.js';
32
+
33
+ // Auto-initialize: load the Emscripten glue relative to this module's URL,
34
+ // instantiate the WASM module, and wire up the wrapper before any export is used.
35
+ const glueUrl = new URL('./numkong-emscripten.js', import.meta.url);
36
+ const { default: NumKongModule } = await import(glueUrl.href);
37
+ const wasmInstance = await NumKongModule({
38
+ locateFile: (path: string) => new URL(path, glueUrl).href,
39
+ });
40
+ initWasm(wasmInstance);
@@ -80,11 +80,8 @@ let isMemory64 = false;
80
80
  let resultPtr: number = 0;
81
81
 
82
82
  // Heap views (created from wasmMemory buffer)
83
- let HEAP8: Int8Array;
84
- let HEAP16: Int16Array;
85
83
  let HEAP32: Int32Array;
86
84
  let HEAPU8: Uint8Array;
87
- let HEAPU16: Uint16Array;
88
85
  let HEAPU32: Uint32Array;
89
86
  let HEAPF32: Float32Array;
90
87
  let HEAPF64: Float64Array;
@@ -106,11 +103,8 @@ export function initWasm(wasmModule: EmscriptenModule): void {
106
103
 
107
104
  // Create heap views from the WASM memory buffer
108
105
  const buffer = wasmModule.wasmMemory.buffer;
109
- HEAP8 = new Int8Array(buffer);
110
- HEAP16 = new Int16Array(buffer);
111
106
  HEAP32 = new Int32Array(buffer);
112
107
  HEAPU8 = new Uint8Array(buffer);
113
- HEAPU16 = new Uint16Array(buffer);
114
108
  HEAPU32 = new Uint32Array(buffer);
115
109
  HEAPF32 = new Float32Array(buffer);
116
110
  HEAPF64 = new Float64Array(buffer);
@@ -139,7 +133,7 @@ export function initWasm(wasmModule: EmscriptenModule): void {
139
133
  interface TypeInfo {
140
134
  dtype: DType;
141
135
  bytesPerElement: number;
142
- heapView: 'HEAP8' | 'HEAP16' | 'HEAP32' | 'HEAPU8' | 'HEAPU16' | 'HEAPU32' | 'HEAPF32' | 'HEAPF64';
136
+ heapView: 'HEAP32' | 'HEAPU8' | 'HEAPU32' | 'HEAPF32' | 'HEAPF64';
143
137
  resultType: 'f32' | 'f64' | 'i32' | 'u32';
144
138
  }
145
139
 
@@ -152,7 +146,7 @@ function detectType(arr: any): TypeInfo {
152
146
  } else if (arr instanceof Float32Array) {
153
147
  return { dtype: DType.F32, bytesPerElement: 4, heapView: 'HEAPF32', resultType: 'f64' };
154
148
  } else if (arr instanceof Int8Array) {
155
- return { dtype: DType.I8, bytesPerElement: 1, heapView: 'HEAP8', resultType: 'i32' };
149
+ return { dtype: DType.I8, bytesPerElement: 1, heapView: 'HEAPU8', resultType: 'i32' };
156
150
  } else if (arr instanceof Uint8Array) {
157
151
  return { dtype: DType.U8, bytesPerElement: 1, heapView: 'HEAPU8', resultType: 'u32' };
158
152
  }
@@ -161,9 +155,9 @@ function detectType(arr: any): TypeInfo {
161
155
  const constructorName = arr.constructor.name;
162
156
 
163
157
  if (constructorName === 'Float16Array') {
164
- return { dtype: DType.F16, bytesPerElement: 2, heapView: 'HEAPU16', resultType: 'f32' };
158
+ return { dtype: DType.F16, bytesPerElement: 2, heapView: 'HEAPU8', resultType: 'f32' };
165
159
  } else if (constructorName === 'BFloat16Array') {
166
- return { dtype: DType.BF16, bytesPerElement: 2, heapView: 'HEAPU16', resultType: 'f32' };
160
+ return { dtype: DType.BF16, bytesPerElement: 2, heapView: 'HEAPU8', resultType: 'f32' };
167
161
  } else if (constructorName === 'E4M3Array') {
168
162
  throw new Error('E4M3 not yet supported in WASM backend');
169
163
  } else if (constructorName === 'E5M2Array') {
@@ -182,9 +176,9 @@ function typeInfoFromDtype(dtype: DType): TypeInfo {
182
176
  switch (dtype) {
183
177
  case DType.F64: return { dtype, bytesPerElement: 8, heapView: 'HEAPF64', resultType: 'f64' };
184
178
  case DType.F32: return { dtype, bytesPerElement: 4, heapView: 'HEAPF32', resultType: 'f64' };
185
- case DType.F16: return { dtype, bytesPerElement: 2, heapView: 'HEAPU16', resultType: 'f32' };
186
- case DType.BF16: return { dtype, bytesPerElement: 2, heapView: 'HEAPU16', resultType: 'f32' };
187
- case DType.I8: return { dtype, bytesPerElement: 1, heapView: 'HEAP8', resultType: 'i32' };
179
+ case DType.F16: return { dtype, bytesPerElement: 2, heapView: 'HEAPU8', resultType: 'f32' };
180
+ case DType.BF16: return { dtype, bytesPerElement: 2, heapView: 'HEAPU8', resultType: 'f32' };
181
+ case DType.I8: return { dtype, bytesPerElement: 1, heapView: 'HEAPU8', resultType: 'i32' };
188
182
  case DType.U8: return { dtype, bytesPerElement: 1, heapView: 'HEAPU8', resultType: 'u32' };
189
183
  case DType.U1: return { dtype, bytesPerElement: 1, heapView: 'HEAPU8', resultType: 'u32' };
190
184
  default: throw new Error(`Unsupported dtype: ${dtype}`);
@@ -65,7 +65,7 @@ static int is_compatible_napi_type(napi_typedarray_type napi_type, nk_dtype_t dt
65
65
  * @param out_dtype The dtype of the value stored in the buffer.
66
66
  * @return napi_value containing the result as a JavaScript Number, or NULL on error.
67
67
  */
68
- static napi_value scalar_to_js_number(napi_env env, nk_scalar_buffer_t const *result, nk_dtype_t out_dtype) {
68
+ static napi_value nk_scalar_buffer_to_js_number(napi_env env, nk_scalar_buffer_t const *result, nk_dtype_t out_dtype) {
69
69
  // i64/u64 must return BigInt since they may exceed Number.MAX_SAFE_INTEGER
70
70
  if (out_dtype == nk_i64_k) {
71
71
  napi_value js_result;
@@ -77,30 +77,9 @@ static napi_value scalar_to_js_number(napi_env env, nk_scalar_buffer_t const *re
77
77
  if (napi_create_bigint_uint64(env, result->u64, &js_result) != napi_ok) return NULL;
78
78
  return js_result;
79
79
  }
80
- double result_f64;
81
- switch (out_dtype) {
82
- case nk_f64_k: result_f64 = (double)result->f64; break;
83
- case nk_f32_k: result_f64 = (double)result->f32; break;
84
- case nk_f16_k: {
85
- nk_f32_t t;
86
- nk_f16_to_f32(&result->f16, &t);
87
- result_f64 = (double)t;
88
- break;
89
- }
90
- case nk_bf16_k: {
91
- nk_f32_t t;
92
- nk_bf16_to_f32(&result->bf16, &t);
93
- result_f64 = (double)t;
94
- break;
95
- }
96
- case nk_i8_k: result_f64 = (double)result->i8; break;
97
- case nk_u8_k: result_f64 = (double)result->u8; break;
98
- case nk_i16_k: result_f64 = (double)result->i16; break;
99
- case nk_u16_k: result_f64 = (double)result->u16; break;
100
- case nk_i32_k: result_f64 = (double)result->i32; break;
101
- case nk_u32_k: result_f64 = (double)result->u32; break;
102
- default: napi_throw_error(env, NULL, "Unexpected output dtype in result conversion"); return NULL;
103
- }
80
+ nk_f64c_t result_c;
81
+ nk_scalar_buffer_to_f64c(result, out_dtype, &result_c);
82
+ double result_f64 = result_c.real;
104
83
  napi_value js_result;
105
84
  if (napi_create_double(env, result_f64, &js_result) != napi_ok) return NULL;
106
85
  return js_result;
@@ -209,7 +188,7 @@ static napi_value dense(napi_env env, napi_callback_info info, nk_kernel_kind_t
209
188
  nk_scalar_buffer_t result;
210
189
  metric(data_a, data_b, dimensions, &result);
211
190
 
212
- return scalar_to_js_number(env, &result, out_dtype);
191
+ return nk_scalar_buffer_to_js_number(env, &result, out_dtype);
213
192
  }
214
193
 
215
194
  /** @brief N-API entry for inner product (dot). */
@@ -26,18 +26,30 @@
26
26
  */
27
27
 
28
28
  import build from "node-gyp-build";
29
+ import { createRequire } from "node:module";
29
30
  import * as path from "node:path";
30
31
  import { existsSync } from "node:fs";
31
32
  import { getFileName, getRoot } from "bindings";
32
33
  import { setConversionFunctions, Float16Array, BFloat16Array, E4M3Array, E5M2Array, BinaryArray, TensorBase, VectorBase, VectorView, Vector, MatrixBase, Matrix, PackedMatrix, DType, dtypeToString, outputDtype, KernelFamily } from "./types.js";
33
34
 
34
- let compiled: any;
35
+ function loadNativeAddon(): any {
36
+ // Tier 1: platform-specific optional dependency (@numkong/<os>-<arch>)
37
+ try {
38
+ const req = createRequire(path.join(getDirName(), "noop.js"));
39
+ return req(`@numkong/${process.platform}-${process.arch}`);
40
+ } catch { }
41
+
42
+ // Tier 2: node-gyp-build fallback (local dev, unsupported platform, build-from-source)
43
+ try {
44
+ return build(getBuildDir(getDirName()));
45
+ } catch { }
35
46
 
36
- try {
37
- let builddir = getBuildDir(getDirName());
38
- compiled = build(builddir);
47
+ return null;
48
+ }
39
49
 
40
- // Initialize conversion functions for types.ts
50
+ let compiled: any = loadNativeAddon();
51
+
52
+ if (compiled) {
41
53
  setConversionFunctions({
42
54
  castF16ToF32: compiled.castF16ToF32,
43
55
  castF32ToF16: compiled.castF32ToF16,
@@ -49,12 +61,11 @@ try {
49
61
  castF32ToE5M2: compiled.castF32ToE5M2,
50
62
  cast: compiled.cast,
51
63
  });
52
- } catch (e) {
53
- // Native addon not available
54
- // For WASM usage, import the Emscripten module directly (see test/test-wasm.mjs)
64
+ } else {
55
65
  throw new Error(
56
- "NumKong native addon not found. Build with `npm run build` or use WASM " +
57
- "by importing the Emscripten module directly. See test/test-wasm.mjs for examples."
66
+ "NumKong native addon not found. Install with `npm install numkong` (which fetches " +
67
+ "the prebuilt binary), or build from source with `npm run install`. " +
68
+ "For WASM, import from 'numkong/wasm' instead."
58
69
  );
59
70
  }
60
71
 
@@ -76,7 +87,7 @@ export const Capability = {
76
87
  SVE: 1n << 10n, // 2020: ARM SVE
77
88
  SVEHALF: 1n << 11n, // 2020: ARM SVE FP16
78
89
  SVESDOT: 1n << 12n, // 2020: ARM SVE i8 dot
79
- SIERRA: 1n << 13n, // 2021: Intel AVX2+VNNI
90
+ ALDER: 1n << 13n, // 2021: Intel AVX2+VNNI
80
91
  SVEBFDOT: 1n << 14n, // 2021: ARM SVE BF16
81
92
  SVE2: 1n << 15n, // 2022: ARM SVE2
82
93
  V128RELAXED: 1n << 16n, // 2022: WASM Relaxed SIMD
@@ -97,6 +108,12 @@ export const Capability = {
97
108
  SMEBF16: 1n << 31n, // 2025+: ARM SME B16B16
98
109
  SMELUT2: 1n << 32n, // 2025+: ARM SME LUTv2
99
110
  RVVBB: 1n << 33n, // 2025+: RISC-V Zvbb
111
+ SIERRA: 1n << 34n, // 2024: Intel AVXVNNIINT8
112
+ SMEBI32: 1n << 35n, // 2025+: ARM SME BI32I32
113
+ LOONGSONASX: 1n << 36n, // LoongArch LASX 256-bit SIMD
114
+ POWERVSX: 1n << 37n, // Power VSX 128-bit SIMD
115
+ DIAMOND: 1n << 38n, // 2025+: Intel AVX10.2
116
+ NEONFP8: 1n << 39n, // ARM NEON FP8
100
117
  } as const;
101
118
 
102
119
  export { Float16Array, BFloat16Array, E4M3Array, E5M2Array, BinaryArray, TensorBase, VectorBase, VectorView, Vector, MatrixBase, Matrix, PackedMatrix, outputDtype };
@@ -571,5 +588,13 @@ function getDirName() {
571
588
  try {
572
589
  if (__dirname) return __dirname;
573
590
  } catch (e) { }
591
+ // Fall back to cwd, which is typically the project root in dev and CI.
592
+ // This helps runtimes like Deno and Bun where the `bindings` module's
593
+ // V8 stack-trace hack may not resolve correctly.
594
+ try {
595
+ const cwd = process.cwd();
596
+ if (existsSync(path.join(cwd, "build")) || existsSync(path.join(cwd, "prebuilds")))
597
+ return cwd;
598
+ } catch (e) { }
574
599
  return getRoot(getFileName());
575
600
  }
@@ -8,6 +8,7 @@
8
8
  "node-gyp-build.d.ts",
9
9
  "numkong.ts",
10
10
  "numkong-wasm.ts",
11
+ "numkong-browser.ts",
11
12
  "types.ts"
12
13
  ],
13
14
  "compilerOptions": {
@@ -4,5 +4,10 @@
4
4
  "module": "commonjs",
5
5
  "outDir": "dist/cjs",
6
6
  "target": "ES2020"
7
- }
7
+ },
8
+ "exclude": [
9
+ "node_modules",
10
+ "dist",
11
+ "numkong-browser.ts"
12
+ ]
8
13
  }