numkong 7.0.0 → 7.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +197 -124
- package/binding.gyp +34 -484
- package/c/dispatch_bf16.c +59 -1
- package/c/dispatch_e2m3.c +41 -8
- package/c/dispatch_e3m2.c +49 -8
- package/c/dispatch_e4m3.c +51 -9
- package/c/dispatch_e5m2.c +45 -1
- package/c/dispatch_f16.c +79 -26
- package/c/dispatch_f16c.c +5 -5
- package/c/dispatch_f32.c +56 -0
- package/c/dispatch_f64.c +52 -0
- package/c/dispatch_i4.c +3 -0
- package/c/dispatch_i8.c +62 -3
- package/c/dispatch_other.c +18 -0
- package/c/dispatch_u1.c +54 -9
- package/c/dispatch_u4.c +3 -0
- package/c/dispatch_u8.c +64 -3
- package/c/numkong.c +3 -0
- package/include/README.md +79 -9
- package/include/numkong/attention/sapphireamx.h +278 -276
- package/include/numkong/attention/sme.h +983 -977
- package/include/numkong/attention.h +1 -1
- package/include/numkong/capabilities.h +289 -94
- package/include/numkong/cast/README.md +40 -40
- package/include/numkong/cast/diamond.h +64 -0
- package/include/numkong/cast/haswell.h +42 -194
- package/include/numkong/cast/icelake.h +42 -37
- package/include/numkong/cast/loongsonasx.h +252 -0
- package/include/numkong/cast/neon.h +216 -249
- package/include/numkong/cast/powervsx.h +449 -0
- package/include/numkong/cast/rvv.h +223 -274
- package/include/numkong/cast/sapphire.h +18 -18
- package/include/numkong/cast/serial.h +1018 -944
- package/include/numkong/cast/skylake.h +82 -23
- package/include/numkong/cast/v128relaxed.h +462 -105
- package/include/numkong/cast.h +24 -0
- package/include/numkong/cast.hpp +44 -0
- package/include/numkong/curved/README.md +17 -17
- package/include/numkong/curved/neon.h +131 -7
- package/include/numkong/curved/neonbfdot.h +6 -7
- package/include/numkong/curved/rvv.h +26 -26
- package/include/numkong/curved/smef64.h +186 -182
- package/include/numkong/curved.h +14 -18
- package/include/numkong/dot/README.md +154 -137
- package/include/numkong/dot/alder.h +43 -43
- package/include/numkong/dot/diamond.h +158 -0
- package/include/numkong/dot/genoa.h +4 -30
- package/include/numkong/dot/haswell.h +215 -180
- package/include/numkong/dot/icelake.h +190 -76
- package/include/numkong/dot/loongsonasx.h +671 -0
- package/include/numkong/dot/neon.h +124 -73
- package/include/numkong/dot/neonbfdot.h +11 -12
- package/include/numkong/dot/neonfhm.h +44 -46
- package/include/numkong/dot/neonfp8.h +323 -0
- package/include/numkong/dot/neonsdot.h +190 -76
- package/include/numkong/dot/powervsx.h +752 -0
- package/include/numkong/dot/rvv.h +92 -84
- package/include/numkong/dot/rvvbf16.h +12 -12
- package/include/numkong/dot/rvvhalf.h +12 -12
- package/include/numkong/dot/sapphire.h +4 -4
- package/include/numkong/dot/serial.h +66 -30
- package/include/numkong/dot/sierra.h +31 -31
- package/include/numkong/dot/skylake.h +142 -110
- package/include/numkong/dot/sve.h +217 -177
- package/include/numkong/dot/svebfdot.h +10 -10
- package/include/numkong/dot/svehalf.h +85 -41
- package/include/numkong/dot/svesdot.h +89 -0
- package/include/numkong/dot/v128relaxed.h +124 -89
- package/include/numkong/dot.h +114 -48
- package/include/numkong/dots/README.md +203 -203
- package/include/numkong/dots/alder.h +12 -9
- package/include/numkong/dots/diamond.h +86 -0
- package/include/numkong/dots/genoa.h +10 -4
- package/include/numkong/dots/haswell.h +63 -48
- package/include/numkong/dots/icelake.h +27 -18
- package/include/numkong/dots/loongsonasx.h +176 -0
- package/include/numkong/dots/neon.h +14 -11
- package/include/numkong/dots/neonbfdot.h +4 -3
- package/include/numkong/dots/neonfhm.h +11 -9
- package/include/numkong/dots/neonfp8.h +99 -0
- package/include/numkong/dots/neonsdot.h +48 -12
- package/include/numkong/dots/powervsx.h +194 -0
- package/include/numkong/dots/rvv.h +451 -344
- package/include/numkong/dots/sapphireamx.h +1028 -984
- package/include/numkong/dots/serial.h +213 -197
- package/include/numkong/dots/sierra.h +10 -7
- package/include/numkong/dots/skylake.h +47 -36
- package/include/numkong/dots/sme.h +2001 -2364
- package/include/numkong/dots/smebi32.h +175 -162
- package/include/numkong/dots/smef64.h +328 -323
- package/include/numkong/dots/v128relaxed.h +64 -41
- package/include/numkong/dots.h +573 -293
- package/include/numkong/dots.hpp +45 -43
- package/include/numkong/each/README.md +133 -137
- package/include/numkong/each/haswell.h +6 -6
- package/include/numkong/each/icelake.h +7 -7
- package/include/numkong/each/neon.h +76 -42
- package/include/numkong/each/neonbfdot.h +11 -12
- package/include/numkong/each/neonhalf.h +24 -116
- package/include/numkong/each/rvv.h +28 -28
- package/include/numkong/each/sapphire.h +27 -161
- package/include/numkong/each/serial.h +6 -6
- package/include/numkong/each/skylake.h +7 -7
- package/include/numkong/each/v128relaxed.h +562 -0
- package/include/numkong/each.h +148 -62
- package/include/numkong/each.hpp +2 -2
- package/include/numkong/geospatial/README.md +18 -18
- package/include/numkong/geospatial/haswell.h +365 -325
- package/include/numkong/geospatial/neon.h +350 -306
- package/include/numkong/geospatial/rvv.h +4 -4
- package/include/numkong/geospatial/skylake.h +376 -340
- package/include/numkong/geospatial/v128relaxed.h +366 -327
- package/include/numkong/geospatial.h +17 -17
- package/include/numkong/matrix.hpp +4 -4
- package/include/numkong/maxsim/README.md +14 -14
- package/include/numkong/maxsim/alder.h +6 -6
- package/include/numkong/maxsim/genoa.h +4 -4
- package/include/numkong/maxsim/haswell.h +6 -6
- package/include/numkong/maxsim/icelake.h +18 -18
- package/include/numkong/maxsim/neonsdot.h +21 -21
- package/include/numkong/maxsim/sapphireamx.h +14 -14
- package/include/numkong/maxsim/serial.h +6 -6
- package/include/numkong/maxsim/sme.h +221 -196
- package/include/numkong/maxsim/v128relaxed.h +6 -6
- package/include/numkong/mesh/README.md +62 -56
- package/include/numkong/mesh/haswell.h +339 -464
- package/include/numkong/mesh/neon.h +1100 -519
- package/include/numkong/mesh/neonbfdot.h +36 -68
- package/include/numkong/mesh/rvv.h +530 -435
- package/include/numkong/mesh/serial.h +75 -91
- package/include/numkong/mesh/skylake.h +1627 -302
- package/include/numkong/mesh/v128relaxed.h +443 -330
- package/include/numkong/mesh.h +63 -49
- package/include/numkong/mesh.hpp +4 -4
- package/include/numkong/numkong.h +3 -3
- package/include/numkong/numkong.hpp +1 -0
- package/include/numkong/probability/README.md +23 -19
- package/include/numkong/probability/neon.h +82 -52
- package/include/numkong/probability/rvv.h +28 -23
- package/include/numkong/probability/serial.h +51 -39
- package/include/numkong/probability.h +20 -23
- package/include/numkong/random.h +1 -1
- package/include/numkong/reduce/README.md +143 -138
- package/include/numkong/reduce/alder.h +81 -77
- package/include/numkong/reduce/haswell.h +222 -220
- package/include/numkong/reduce/neon.h +629 -519
- package/include/numkong/reduce/neonbfdot.h +7 -218
- package/include/numkong/reduce/neonfhm.h +9 -381
- package/include/numkong/reduce/neonsdot.h +9 -9
- package/include/numkong/reduce/rvv.h +928 -802
- package/include/numkong/reduce/serial.h +23 -27
- package/include/numkong/reduce/sierra.h +20 -20
- package/include/numkong/reduce/skylake.h +326 -324
- package/include/numkong/reduce/v128relaxed.h +52 -52
- package/include/numkong/reduce.h +4 -23
- package/include/numkong/reduce.hpp +156 -11
- package/include/numkong/scalar/README.md +6 -6
- package/include/numkong/scalar/haswell.h +26 -17
- package/include/numkong/scalar/loongsonasx.h +74 -0
- package/include/numkong/scalar/neon.h +9 -9
- package/include/numkong/scalar/powervsx.h +96 -0
- package/include/numkong/scalar/rvv.h +2 -2
- package/include/numkong/scalar/sapphire.h +21 -10
- package/include/numkong/scalar/serial.h +21 -21
- package/include/numkong/scalar.h +13 -0
- package/include/numkong/set/README.md +28 -28
- package/include/numkong/set/haswell.h +12 -12
- package/include/numkong/set/icelake.h +14 -14
- package/include/numkong/set/loongsonasx.h +181 -0
- package/include/numkong/set/neon.h +17 -18
- package/include/numkong/set/powervsx.h +326 -0
- package/include/numkong/set/rvv.h +4 -4
- package/include/numkong/set/serial.h +6 -6
- package/include/numkong/set/sve.h +60 -59
- package/include/numkong/set/v128relaxed.h +6 -6
- package/include/numkong/set.h +21 -7
- package/include/numkong/sets/README.md +26 -26
- package/include/numkong/sets/loongsonasx.h +52 -0
- package/include/numkong/sets/powervsx.h +65 -0
- package/include/numkong/sets/smebi32.h +395 -364
- package/include/numkong/sets.h +83 -40
- package/include/numkong/sparse/README.md +4 -4
- package/include/numkong/sparse/icelake.h +101 -101
- package/include/numkong/sparse/serial.h +1 -1
- package/include/numkong/sparse/sve2.h +137 -141
- package/include/numkong/sparse/turin.h +12 -12
- package/include/numkong/sparse.h +10 -10
- package/include/numkong/spatial/README.md +230 -226
- package/include/numkong/spatial/alder.h +113 -116
- package/include/numkong/spatial/diamond.h +240 -0
- package/include/numkong/spatial/genoa.h +0 -68
- package/include/numkong/spatial/haswell.h +74 -55
- package/include/numkong/spatial/icelake.h +539 -58
- package/include/numkong/spatial/loongsonasx.h +483 -0
- package/include/numkong/spatial/neon.h +125 -52
- package/include/numkong/spatial/neonbfdot.h +8 -9
- package/include/numkong/spatial/neonfp8.h +258 -0
- package/include/numkong/spatial/neonsdot.h +180 -12
- package/include/numkong/spatial/powervsx.h +738 -0
- package/include/numkong/spatial/rvv.h +146 -139
- package/include/numkong/spatial/rvvbf16.h +17 -12
- package/include/numkong/spatial/rvvhalf.h +13 -10
- package/include/numkong/spatial/serial.h +13 -12
- package/include/numkong/spatial/sierra.h +232 -39
- package/include/numkong/spatial/skylake.h +73 -74
- package/include/numkong/spatial/sve.h +93 -72
- package/include/numkong/spatial/svebfdot.h +29 -29
- package/include/numkong/spatial/svehalf.h +52 -26
- package/include/numkong/spatial/svesdot.h +142 -0
- package/include/numkong/spatial/v128relaxed.h +293 -41
- package/include/numkong/spatial.h +338 -82
- package/include/numkong/spatials/README.md +194 -194
- package/include/numkong/spatials/diamond.h +82 -0
- package/include/numkong/spatials/haswell.h +2 -2
- package/include/numkong/spatials/loongsonasx.h +153 -0
- package/include/numkong/spatials/neonfp8.h +111 -0
- package/include/numkong/spatials/neonsdot.h +34 -0
- package/include/numkong/spatials/powervsx.h +153 -0
- package/include/numkong/spatials/rvv.h +259 -243
- package/include/numkong/spatials/sapphireamx.h +173 -173
- package/include/numkong/spatials/serial.h +2 -2
- package/include/numkong/spatials/skylake.h +2 -2
- package/include/numkong/spatials/sme.h +590 -605
- package/include/numkong/spatials/smef64.h +139 -130
- package/include/numkong/spatials/v128relaxed.h +2 -2
- package/include/numkong/spatials.h +820 -500
- package/include/numkong/spatials.hpp +49 -48
- package/include/numkong/tensor.hpp +406 -17
- package/include/numkong/trigonometry/README.md +19 -19
- package/include/numkong/trigonometry/haswell.h +402 -401
- package/include/numkong/trigonometry/neon.h +386 -387
- package/include/numkong/trigonometry/rvv.h +52 -51
- package/include/numkong/trigonometry/serial.h +13 -13
- package/include/numkong/trigonometry/skylake.h +373 -369
- package/include/numkong/trigonometry/v128relaxed.h +375 -374
- package/include/numkong/trigonometry.h +13 -13
- package/include/numkong/trigonometry.hpp +2 -2
- package/include/numkong/types.h +287 -49
- package/include/numkong/types.hpp +436 -12
- package/include/numkong/vector.hpp +82 -14
- package/javascript/dist/cjs/numkong-wasm.js +6 -12
- package/javascript/dist/cjs/numkong.d.ts +7 -1
- package/javascript/dist/cjs/numkong.js +37 -11
- package/javascript/dist/cjs/types.d.ts +9 -0
- package/javascript/dist/cjs/types.js +96 -0
- package/javascript/dist/esm/numkong-browser.d.ts +14 -0
- package/javascript/dist/esm/numkong-browser.js +23 -0
- package/javascript/dist/esm/numkong-wasm.js +6 -12
- package/javascript/dist/esm/numkong.d.ts +7 -1
- package/javascript/dist/esm/numkong.js +37 -11
- package/javascript/dist/esm/types.d.ts +9 -0
- package/javascript/dist/esm/types.js +96 -0
- package/javascript/node-gyp-build.d.ts +4 -1
- package/javascript/numkong-browser.ts +40 -0
- package/javascript/numkong-wasm.ts +7 -13
- package/javascript/numkong.c +5 -26
- package/javascript/numkong.ts +36 -11
- package/javascript/tsconfig-base.json +1 -0
- package/javascript/tsconfig-cjs.json +6 -1
- package/javascript/types.ts +110 -0
- package/numkong.gypi +101 -0
- package/package.json +34 -13
- package/probes/arm_neon.c +8 -0
- package/probes/arm_neon_bfdot.c +9 -0
- package/probes/arm_neon_fhm.c +9 -0
- package/probes/arm_neon_half.c +8 -0
- package/probes/arm_neon_sdot.c +9 -0
- package/probes/arm_neonfp8.c +9 -0
- package/probes/arm_sme.c +16 -0
- package/probes/arm_sme2.c +16 -0
- package/probes/arm_sme2p1.c +16 -0
- package/probes/arm_sme_bf16.c +16 -0
- package/probes/arm_sme_bi32.c +16 -0
- package/probes/arm_sme_f64.c +16 -0
- package/probes/arm_sme_fa64.c +14 -0
- package/probes/arm_sme_half.c +16 -0
- package/probes/arm_sme_lut2.c +15 -0
- package/probes/arm_sve.c +18 -0
- package/probes/arm_sve2.c +20 -0
- package/probes/arm_sve2p1.c +18 -0
- package/probes/arm_sve_bfdot.c +20 -0
- package/probes/arm_sve_half.c +18 -0
- package/probes/arm_sve_sdot.c +21 -0
- package/probes/loongarch_lasx.c +12 -0
- package/probes/power_vsx.c +12 -0
- package/probes/probe.js +127 -0
- package/probes/riscv_rvv.c +14 -0
- package/probes/riscv_rvv_bb.c +15 -0
- package/probes/riscv_rvv_bf16.c +17 -0
- package/probes/riscv_rvv_half.c +14 -0
- package/probes/wasm_v128relaxed.c +11 -0
- package/probes/x86_alder.c +17 -0
- package/probes/x86_diamond.c +17 -0
- package/probes/x86_genoa.c +17 -0
- package/probes/x86_graniteamx.c +19 -0
- package/probes/x86_haswell.c +11 -0
- package/probes/x86_icelake.c +17 -0
- package/probes/x86_sapphire.c +16 -0
- package/probes/x86_sapphireamx.c +18 -0
- package/probes/x86_sierra.c +17 -0
- package/probes/x86_skylake.c +15 -0
- package/probes/x86_turin.c +17 -0
- package/wasm/numkong-emscripten.js +2 -0
- package/wasm/numkong.d.ts +14 -0
- package/wasm/numkong.js +1124 -0
- package/wasm/numkong.wasm +0 -0
- package/include/numkong/curved/neonhalf.h +0 -212
- package/include/numkong/dot/neonhalf.h +0 -198
- package/include/numkong/dots/neonhalf.h +0 -57
- package/include/numkong/mesh/neonhalf.h +0 -616
- package/include/numkong/reduce/neonhalf.h +0 -157
- package/include/numkong/spatial/neonhalf.h +0 -118
- package/include/numkong/spatial/sapphire.h +0 -343
- package/include/numkong/spatials/neonhalf.h +0 -58
- package/javascript/README.md +0 -246
|
@@ -43,7 +43,7 @@ export declare const Capability: {
|
|
|
43
43
|
readonly SVE: bigint;
|
|
44
44
|
readonly SVEHALF: bigint;
|
|
45
45
|
readonly SVESDOT: bigint;
|
|
46
|
-
readonly
|
|
46
|
+
readonly ALDER: bigint;
|
|
47
47
|
readonly SVEBFDOT: bigint;
|
|
48
48
|
readonly SVE2: bigint;
|
|
49
49
|
readonly V128RELAXED: bigint;
|
|
@@ -64,6 +64,12 @@ export declare const Capability: {
|
|
|
64
64
|
readonly SMEBF16: bigint;
|
|
65
65
|
readonly SMELUT2: bigint;
|
|
66
66
|
readonly RVVBB: bigint;
|
|
67
|
+
readonly SIERRA: bigint;
|
|
68
|
+
readonly SMEBI32: bigint;
|
|
69
|
+
readonly LOONGSONASX: bigint;
|
|
70
|
+
readonly POWERVSX: bigint;
|
|
71
|
+
readonly DIAMOND: bigint;
|
|
72
|
+
readonly NEONFP8: bigint;
|
|
67
73
|
};
|
|
68
74
|
export { Float16Array, BFloat16Array, E4M3Array, E5M2Array, BinaryArray, TensorBase, VectorBase, VectorView, Vector, MatrixBase, Matrix, PackedMatrix, outputDtype };
|
|
69
75
|
/** Convert a single FP16 value (as uint16 bits) to FP32 */
|
|
@@ -25,15 +25,27 @@
|
|
|
25
25
|
* ```
|
|
26
26
|
*/
|
|
27
27
|
import build from "node-gyp-build";
|
|
28
|
+
import { createRequire } from "node:module";
|
|
28
29
|
import * as path from "node:path";
|
|
29
30
|
import { existsSync } from "node:fs";
|
|
30
31
|
import { getFileName, getRoot } from "bindings";
|
|
31
32
|
import { setConversionFunctions, Float16Array, BFloat16Array, E4M3Array, E5M2Array, BinaryArray, TensorBase, VectorBase, VectorView, Vector, MatrixBase, Matrix, PackedMatrix, DType, dtypeToString, outputDtype } from "./types.js";
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
33
|
+
function loadNativeAddon() {
|
|
34
|
+
// Tier 1: platform-specific optional dependency (@numkong/<os>-<arch>)
|
|
35
|
+
try {
|
|
36
|
+
const req = createRequire(path.join(getDirName(), "noop.js"));
|
|
37
|
+
return req(`@numkong/${process.platform}-${process.arch}`);
|
|
38
|
+
}
|
|
39
|
+
catch { }
|
|
40
|
+
// Tier 2: node-gyp-build fallback (local dev, unsupported platform, build-from-source)
|
|
41
|
+
try {
|
|
42
|
+
return build(getBuildDir(getDirName()));
|
|
43
|
+
}
|
|
44
|
+
catch { }
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
let compiled = loadNativeAddon();
|
|
48
|
+
if (compiled) {
|
|
37
49
|
setConversionFunctions({
|
|
38
50
|
castF16ToF32: compiled.castF16ToF32,
|
|
39
51
|
castF32ToF16: compiled.castF32ToF16,
|
|
@@ -46,11 +58,10 @@ try {
|
|
|
46
58
|
cast: compiled.cast,
|
|
47
59
|
});
|
|
48
60
|
}
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
"by importing the Emscripten module directly. See test/test-wasm.mjs for examples.");
|
|
61
|
+
else {
|
|
62
|
+
throw new Error("NumKong native addon not found. Install with `npm install numkong` (which fetches " +
|
|
63
|
+
"the prebuilt binary), or build from source with `npm run install`. " +
|
|
64
|
+
"For WASM, import from 'numkong/wasm' instead.");
|
|
54
65
|
}
|
|
55
66
|
/**
|
|
56
67
|
* CPU capability bit masks in chronological order (by first commercial silicon).
|
|
@@ -70,7 +81,7 @@ export const Capability = {
|
|
|
70
81
|
SVE: 1n << 10n, // 2020: ARM SVE
|
|
71
82
|
SVEHALF: 1n << 11n, // 2020: ARM SVE FP16
|
|
72
83
|
SVESDOT: 1n << 12n, // 2020: ARM SVE i8 dot
|
|
73
|
-
|
|
84
|
+
ALDER: 1n << 13n, // 2021: Intel AVX2+VNNI
|
|
74
85
|
SVEBFDOT: 1n << 14n, // 2021: ARM SVE BF16
|
|
75
86
|
SVE2: 1n << 15n, // 2022: ARM SVE2
|
|
76
87
|
V128RELAXED: 1n << 16n, // 2022: WASM Relaxed SIMD
|
|
@@ -91,6 +102,12 @@ export const Capability = {
|
|
|
91
102
|
SMEBF16: 1n << 31n, // 2025+: ARM SME B16B16
|
|
92
103
|
SMELUT2: 1n << 32n, // 2025+: ARM SME LUTv2
|
|
93
104
|
RVVBB: 1n << 33n, // 2025+: RISC-V Zvbb
|
|
105
|
+
SIERRA: 1n << 34n, // 2024: Intel AVXVNNIINT8
|
|
106
|
+
SMEBI32: 1n << 35n, // 2025+: ARM SME BI32I32
|
|
107
|
+
LOONGSONASX: 1n << 36n, // LoongArch LASX 256-bit SIMD
|
|
108
|
+
POWERVSX: 1n << 37n, // Power VSX 128-bit SIMD
|
|
109
|
+
DIAMOND: 1n << 38n, // 2025+: Intel AVX10.2
|
|
110
|
+
NEONFP8: 1n << 39n, // ARM NEON FP8
|
|
94
111
|
};
|
|
95
112
|
export { Float16Array, BFloat16Array, E4M3Array, E5M2Array, BinaryArray, TensorBase, VectorBase, VectorView, Vector, MatrixBase, Matrix, PackedMatrix, outputDtype };
|
|
96
113
|
/** Convert a single FP16 value (as uint16 bits) to FP32 */
|
|
@@ -448,5 +465,14 @@ function getDirName() {
|
|
|
448
465
|
return __dirname;
|
|
449
466
|
}
|
|
450
467
|
catch (e) { }
|
|
468
|
+
// Fall back to cwd, which is typically the project root in dev and CI.
|
|
469
|
+
// This helps runtimes like Deno and Bun where the `bindings` module's
|
|
470
|
+
// V8 stack-trace hack may not resolve correctly.
|
|
471
|
+
try {
|
|
472
|
+
const cwd = process.cwd();
|
|
473
|
+
if (existsSync(path.join(cwd, "build")) || existsSync(path.join(cwd, "prebuilds")))
|
|
474
|
+
return cwd;
|
|
475
|
+
}
|
|
476
|
+
catch (e) { }
|
|
451
477
|
return getRoot(getFileName());
|
|
452
478
|
}
|
|
@@ -74,6 +74,7 @@ export declare abstract class VectorBase extends TensorBase {
|
|
|
74
74
|
*/
|
|
75
75
|
export declare class VectorView extends VectorBase {
|
|
76
76
|
constructor(buffer: ArrayBuffer, byteOffset: number, length: number, dtype: DType);
|
|
77
|
+
toString(): string;
|
|
77
78
|
/** @brief Create a VectorView from any TypedArray, inferring or accepting dtype. */
|
|
78
79
|
static from(arr: TypedArray, dtype?: DType): VectorView;
|
|
79
80
|
}
|
|
@@ -86,6 +87,7 @@ export declare class VectorView extends VectorBase {
|
|
|
86
87
|
export declare class Vector extends VectorBase {
|
|
87
88
|
constructor(length: number, dtype: DType);
|
|
88
89
|
constructor(buffer: ArrayBuffer, length: number, dtype: DType);
|
|
90
|
+
toString(): string;
|
|
89
91
|
/** @brief Create an owning Vector by copying data from a TypedArray. */
|
|
90
92
|
static fromTypedArray(arr: TypedArray, dtype?: DType): Vector;
|
|
91
93
|
/** @brief Create an owning Vector by copying data from any TensorBase. */
|
|
@@ -117,6 +119,7 @@ export declare abstract class MatrixBase extends TensorBase {
|
|
|
117
119
|
export declare class Matrix extends MatrixBase {
|
|
118
120
|
constructor(rows: number, cols: number, dtype: DType);
|
|
119
121
|
constructor(buffer: ArrayBuffer, byteOffset: number, dtype: DType, rows: number, cols: number, rowStride?: number, colStride?: number);
|
|
122
|
+
toString(): string;
|
|
120
123
|
static fromTypedArray(array: TypedArray, rows: number, cols: number, dtype?: DType): Matrix;
|
|
121
124
|
toTypedArray(): TypedArray;
|
|
122
125
|
row(index: number): VectorView;
|
|
@@ -138,6 +141,7 @@ export declare class PackedMatrix {
|
|
|
138
141
|
constructor(buffer: ArrayBuffer, width: number, depth: number, dtype: DType, byteLength: number);
|
|
139
142
|
dispose(): void;
|
|
140
143
|
get disposed(): boolean;
|
|
144
|
+
toString(): string;
|
|
141
145
|
}
|
|
142
146
|
/** @brief Kernel family identifiers for output dtype resolution. */
|
|
143
147
|
export type KernelFamily = 'dots' | 'angulars' | 'euclideans';
|
|
@@ -174,6 +178,7 @@ export declare class Float16Array extends Uint16Array {
|
|
|
174
178
|
* @param value f32 value to encode and store
|
|
175
179
|
*/
|
|
176
180
|
setFloat32(index: number, value: number): void;
|
|
181
|
+
toString(): string;
|
|
177
182
|
}
|
|
178
183
|
/**
|
|
179
184
|
* @brief Brain Float 16 (bf16)
|
|
@@ -190,6 +195,7 @@ export declare class BFloat16Array extends Uint16Array {
|
|
|
190
195
|
toFloat32Array(): Float32Array;
|
|
191
196
|
getFloat32(index: number): number;
|
|
192
197
|
setFloat32(index: number, value: number): void;
|
|
198
|
+
toString(): string;
|
|
193
199
|
}
|
|
194
200
|
/**
|
|
195
201
|
* @brief FP8 E4M3 (4-bit exponent, 3-bit mantissa)
|
|
@@ -205,6 +211,7 @@ export declare class E4M3Array extends Uint8Array {
|
|
|
205
211
|
toFloat32Array(): Float32Array;
|
|
206
212
|
getFloat32(index: number): number;
|
|
207
213
|
setFloat32(index: number, value: number): void;
|
|
214
|
+
toString(): string;
|
|
208
215
|
}
|
|
209
216
|
/**
|
|
210
217
|
* @brief FP8 E5M2 (5-bit exponent, 2-bit mantissa)
|
|
@@ -220,6 +227,7 @@ export declare class E5M2Array extends Uint8Array {
|
|
|
220
227
|
toFloat32Array(): Float32Array;
|
|
221
228
|
getFloat32(index: number): number;
|
|
222
229
|
setFloat32(index: number, value: number): void;
|
|
230
|
+
toString(): string;
|
|
223
231
|
}
|
|
224
232
|
/**
|
|
225
233
|
* @brief Binary Array (u1) - Bit-packed binary vectors
|
|
@@ -260,6 +268,7 @@ export declare class BinaryArray extends Uint8Array {
|
|
|
260
268
|
* @returns Binary array with quantized values
|
|
261
269
|
*/
|
|
262
270
|
static fromFloat64Array(vector: Float64Array): BinaryArray;
|
|
271
|
+
toString(): string;
|
|
263
272
|
}
|
|
264
273
|
/**
|
|
265
274
|
* @brief Type guard to check if an object is a Float16Array.
|
|
@@ -102,6 +102,12 @@ export class VectorView extends VectorBase {
|
|
|
102
102
|
constructor(buffer, byteOffset, length, dtype) {
|
|
103
103
|
super(buffer, byteOffset, length, dtype);
|
|
104
104
|
}
|
|
105
|
+
toString() {
|
|
106
|
+
return `VectorView(${this.length}, ${dtypeToString(this.dtype)})`;
|
|
107
|
+
}
|
|
108
|
+
[Symbol.for('nodejs.util.inspect.custom')]() {
|
|
109
|
+
return this.toString();
|
|
110
|
+
}
|
|
105
111
|
/** @brief Create a VectorView from any TypedArray, inferring or accepting dtype. */
|
|
106
112
|
static from(arr, dtype) {
|
|
107
113
|
const d = dtype ?? inferDtype(arr);
|
|
@@ -143,6 +149,12 @@ export class Vector extends VectorBase {
|
|
|
143
149
|
super(lengthOrBuffer, 0, dtypeOrLength, dtype);
|
|
144
150
|
}
|
|
145
151
|
}
|
|
152
|
+
toString() {
|
|
153
|
+
return `Vector(${this.length}, ${dtypeToString(this.dtype)})`;
|
|
154
|
+
}
|
|
155
|
+
[Symbol.for('nodejs.util.inspect.custom')]() {
|
|
156
|
+
return this.toString();
|
|
157
|
+
}
|
|
146
158
|
/** @brief Create an owning Vector by copying data from a TypedArray. */
|
|
147
159
|
static fromTypedArray(arr, dtype) {
|
|
148
160
|
const d = dtype ?? inferDtype(arr);
|
|
@@ -238,6 +250,12 @@ export class Matrix extends MatrixBase {
|
|
|
238
250
|
super(rowsOrBuffer, colsOrByteOffset, dtype, r, c, rowStride ?? c * bpe, colStride ?? bpe);
|
|
239
251
|
}
|
|
240
252
|
}
|
|
253
|
+
toString() {
|
|
254
|
+
return `Matrix(${this.rows}\u00d7${this.cols}, ${dtypeToString(this.dtype)})`;
|
|
255
|
+
}
|
|
256
|
+
[Symbol.for('nodejs.util.inspect.custom')]() {
|
|
257
|
+
return this.toString();
|
|
258
|
+
}
|
|
241
259
|
static fromTypedArray(array, rows, cols, dtype) {
|
|
242
260
|
const d = dtype ?? inferDtype(array);
|
|
243
261
|
const buf = array.buffer.slice(array.byteOffset, array.byteOffset + array.byteLength);
|
|
@@ -277,6 +295,12 @@ export class PackedMatrix {
|
|
|
277
295
|
}
|
|
278
296
|
dispose() { this._disposed = true; }
|
|
279
297
|
get disposed() { return this._disposed; }
|
|
298
|
+
toString() {
|
|
299
|
+
return `PackedMatrix(${this.width}\u00d7${this.depth}, ${dtypeToString(this.dtype)}, ${this.byteLength} bytes)`;
|
|
300
|
+
}
|
|
301
|
+
[Symbol.for('nodejs.util.inspect.custom')]() {
|
|
302
|
+
return this.toString();
|
|
303
|
+
}
|
|
280
304
|
}
|
|
281
305
|
/**
|
|
282
306
|
* @brief Determines the output dtype for a given kernel family and input dtype.
|
|
@@ -363,6 +387,21 @@ export class Float16Array extends Uint16Array {
|
|
|
363
387
|
}
|
|
364
388
|
this[index] = conversionFunctions.castF32ToF16(value);
|
|
365
389
|
}
|
|
390
|
+
toString() {
|
|
391
|
+
if (!conversionFunctions)
|
|
392
|
+
return `Float16Array(${this.length})`;
|
|
393
|
+
const limit = Math.min(this.length, 20);
|
|
394
|
+
const parts = [];
|
|
395
|
+
for (let i = 0; i < limit; i++) {
|
|
396
|
+
const f = conversionFunctions.castF16ToF32(this[i]);
|
|
397
|
+
parts.push(`${f} [0x${this[i].toString(16).padStart(4, '0')}]`);
|
|
398
|
+
}
|
|
399
|
+
const suffix = this.length > 20 ? ', ...' : '';
|
|
400
|
+
return `Float16Array(${this.length}) [${parts.join(', ')}${suffix}]`;
|
|
401
|
+
}
|
|
402
|
+
[Symbol.for('nodejs.util.inspect.custom')]() {
|
|
403
|
+
return this.toString();
|
|
404
|
+
}
|
|
366
405
|
}
|
|
367
406
|
/**
|
|
368
407
|
* @brief Brain Float 16 (bf16)
|
|
@@ -415,6 +454,21 @@ export class BFloat16Array extends Uint16Array {
|
|
|
415
454
|
}
|
|
416
455
|
this[index] = conversionFunctions.castF32ToBF16(value);
|
|
417
456
|
}
|
|
457
|
+
toString() {
|
|
458
|
+
if (!conversionFunctions)
|
|
459
|
+
return `BFloat16Array(${this.length})`;
|
|
460
|
+
const limit = Math.min(this.length, 20);
|
|
461
|
+
const parts = [];
|
|
462
|
+
for (let i = 0; i < limit; i++) {
|
|
463
|
+
const f = conversionFunctions.castBF16ToF32(this[i]);
|
|
464
|
+
parts.push(`${f} [0x${this[i].toString(16).padStart(4, '0')}]`);
|
|
465
|
+
}
|
|
466
|
+
const suffix = this.length > 20 ? ', ...' : '';
|
|
467
|
+
return `BFloat16Array(${this.length}) [${parts.join(', ')}${suffix}]`;
|
|
468
|
+
}
|
|
469
|
+
[Symbol.for('nodejs.util.inspect.custom')]() {
|
|
470
|
+
return this.toString();
|
|
471
|
+
}
|
|
418
472
|
}
|
|
419
473
|
/**
|
|
420
474
|
* @brief FP8 E4M3 (4-bit exponent, 3-bit mantissa)
|
|
@@ -466,6 +520,21 @@ export class E4M3Array extends Uint8Array {
|
|
|
466
520
|
}
|
|
467
521
|
this[index] = conversionFunctions.castF32ToE4M3(value);
|
|
468
522
|
}
|
|
523
|
+
toString() {
|
|
524
|
+
if (!conversionFunctions)
|
|
525
|
+
return `E4M3Array(${this.length})`;
|
|
526
|
+
const limit = Math.min(this.length, 20);
|
|
527
|
+
const parts = [];
|
|
528
|
+
for (let i = 0; i < limit; i++) {
|
|
529
|
+
const f = conversionFunctions.castE4M3ToF32(this[i]);
|
|
530
|
+
parts.push(`${f} [0x${this[i].toString(16).padStart(2, '0')}]`);
|
|
531
|
+
}
|
|
532
|
+
const suffix = this.length > 20 ? ', ...' : '';
|
|
533
|
+
return `E4M3Array(${this.length}) [${parts.join(', ')}${suffix}]`;
|
|
534
|
+
}
|
|
535
|
+
[Symbol.for('nodejs.util.inspect.custom')]() {
|
|
536
|
+
return this.toString();
|
|
537
|
+
}
|
|
469
538
|
}
|
|
470
539
|
/**
|
|
471
540
|
* @brief FP8 E5M2 (5-bit exponent, 2-bit mantissa)
|
|
@@ -517,6 +586,21 @@ export class E5M2Array extends Uint8Array {
|
|
|
517
586
|
}
|
|
518
587
|
this[index] = conversionFunctions.castF32ToE5M2(value);
|
|
519
588
|
}
|
|
589
|
+
toString() {
|
|
590
|
+
if (!conversionFunctions)
|
|
591
|
+
return `E5M2Array(${this.length})`;
|
|
592
|
+
const limit = Math.min(this.length, 20);
|
|
593
|
+
const parts = [];
|
|
594
|
+
for (let i = 0; i < limit; i++) {
|
|
595
|
+
const f = conversionFunctions.castE5M2ToF32(this[i]);
|
|
596
|
+
parts.push(`${f} [0x${this[i].toString(16).padStart(2, '0')}]`);
|
|
597
|
+
}
|
|
598
|
+
const suffix = this.length > 20 ? ', ...' : '';
|
|
599
|
+
return `E5M2Array(${this.length}) [${parts.join(', ')}${suffix}]`;
|
|
600
|
+
}
|
|
601
|
+
[Symbol.for('nodejs.util.inspect.custom')]() {
|
|
602
|
+
return this.toString();
|
|
603
|
+
}
|
|
520
604
|
}
|
|
521
605
|
/**
|
|
522
606
|
* @brief Binary Array (u1) - Bit-packed binary vectors
|
|
@@ -597,6 +681,18 @@ export class BinaryArray extends Uint8Array {
|
|
|
597
681
|
}
|
|
598
682
|
return binary;
|
|
599
683
|
}
|
|
684
|
+
toString() {
|
|
685
|
+
const limit = Math.min(this.length, 20);
|
|
686
|
+
const parts = [];
|
|
687
|
+
for (let i = 0; i < limit; i++) {
|
|
688
|
+
parts.push(`0b${this[i].toString(2).padStart(8, '0')}`);
|
|
689
|
+
}
|
|
690
|
+
const suffix = this.length > 20 ? ', ...' : '';
|
|
691
|
+
return `BinaryArray(${this._bitLength}) [${parts.join(', ')}${suffix}]`;
|
|
692
|
+
}
|
|
693
|
+
[Symbol.for('nodejs.util.inspect.custom')]() {
|
|
694
|
+
return this.toString();
|
|
695
|
+
}
|
|
600
696
|
}
|
|
601
697
|
/**
|
|
602
698
|
* @brief Type guard to check if an object is a Float16Array.
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @brief Self-contained browser ESM entry point for NumKong WASM.
|
|
3
|
+
* @file javascript/numkong-browser.ts
|
|
4
|
+
*
|
|
5
|
+
* Auto-initializes the Emscripten module on import via top-level await.
|
|
6
|
+
* The Emscripten glue (`numkong-emscripten.js`) and binary (`numkong.wasm`)
|
|
7
|
+
* must be co-located with this file (same directory or CDN prefix).
|
|
8
|
+
*
|
|
9
|
+
* Usage:
|
|
10
|
+
* import { dot, euclidean } from './numkong.js';
|
|
11
|
+
* console.log(dot(new Float32Array([1,2,3]), new Float32Array([4,5,6])));
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
export {
|
|
15
|
+
TensorBase, VectorBase, VectorView, Vector,
|
|
16
|
+
MatrixBase, Matrix, PackedMatrix,
|
|
17
|
+
DType, TypedArray, KernelFamily,
|
|
18
|
+
dtypeToString, outputDtype,
|
|
19
|
+
Float16Array, BFloat16Array, E4M3Array, E5M2Array, BinaryArray,
|
|
20
|
+
isFloat16Array, isBFloat16Array, isE4M3Array, isE5M2Array, isBinaryArray,
|
|
21
|
+
} from './types.js';
|
|
22
|
+
|
|
23
|
+
import { initWasm } from './numkong-wasm.js';
|
|
24
|
+
export {
|
|
25
|
+
dot, inner, euclidean, sqeuclidean, angular,
|
|
26
|
+
hamming, jaccard, kullbackleibler, jensenshannon,
|
|
27
|
+
getCapabilities, hasCapability,
|
|
28
|
+
dotsPack, dotsPackedSize,
|
|
29
|
+
dotsPacked, angularsPacked, euclideansPacked,
|
|
30
|
+
dotsSymmetric, angularsSymmetric, euclideansSymmetric,
|
|
31
|
+
} from './numkong-wasm.js';
|
|
32
|
+
|
|
33
|
+
// Auto-initialize: load the Emscripten glue relative to this module's URL,
|
|
34
|
+
// instantiate the WASM module, and wire up the wrapper before any export is used.
|
|
35
|
+
const glueUrl = new URL('./numkong-emscripten.js', import.meta.url);
|
|
36
|
+
const { default: NumKongModule } = await import(glueUrl.href);
|
|
37
|
+
const wasmInstance = await NumKongModule({
|
|
38
|
+
locateFile: (path: string) => new URL(path, glueUrl).href,
|
|
39
|
+
});
|
|
40
|
+
initWasm(wasmInstance);
|
|
@@ -80,11 +80,8 @@ let isMemory64 = false;
|
|
|
80
80
|
let resultPtr: number = 0;
|
|
81
81
|
|
|
82
82
|
// Heap views (created from wasmMemory buffer)
|
|
83
|
-
let HEAP8: Int8Array;
|
|
84
|
-
let HEAP16: Int16Array;
|
|
85
83
|
let HEAP32: Int32Array;
|
|
86
84
|
let HEAPU8: Uint8Array;
|
|
87
|
-
let HEAPU16: Uint16Array;
|
|
88
85
|
let HEAPU32: Uint32Array;
|
|
89
86
|
let HEAPF32: Float32Array;
|
|
90
87
|
let HEAPF64: Float64Array;
|
|
@@ -106,11 +103,8 @@ export function initWasm(wasmModule: EmscriptenModule): void {
|
|
|
106
103
|
|
|
107
104
|
// Create heap views from the WASM memory buffer
|
|
108
105
|
const buffer = wasmModule.wasmMemory.buffer;
|
|
109
|
-
HEAP8 = new Int8Array(buffer);
|
|
110
|
-
HEAP16 = new Int16Array(buffer);
|
|
111
106
|
HEAP32 = new Int32Array(buffer);
|
|
112
107
|
HEAPU8 = new Uint8Array(buffer);
|
|
113
|
-
HEAPU16 = new Uint16Array(buffer);
|
|
114
108
|
HEAPU32 = new Uint32Array(buffer);
|
|
115
109
|
HEAPF32 = new Float32Array(buffer);
|
|
116
110
|
HEAPF64 = new Float64Array(buffer);
|
|
@@ -139,7 +133,7 @@ export function initWasm(wasmModule: EmscriptenModule): void {
|
|
|
139
133
|
interface TypeInfo {
|
|
140
134
|
dtype: DType;
|
|
141
135
|
bytesPerElement: number;
|
|
142
|
-
heapView: '
|
|
136
|
+
heapView: 'HEAP32' | 'HEAPU8' | 'HEAPU32' | 'HEAPF32' | 'HEAPF64';
|
|
143
137
|
resultType: 'f32' | 'f64' | 'i32' | 'u32';
|
|
144
138
|
}
|
|
145
139
|
|
|
@@ -152,7 +146,7 @@ function detectType(arr: any): TypeInfo {
|
|
|
152
146
|
} else if (arr instanceof Float32Array) {
|
|
153
147
|
return { dtype: DType.F32, bytesPerElement: 4, heapView: 'HEAPF32', resultType: 'f64' };
|
|
154
148
|
} else if (arr instanceof Int8Array) {
|
|
155
|
-
return { dtype: DType.I8, bytesPerElement: 1, heapView: '
|
|
149
|
+
return { dtype: DType.I8, bytesPerElement: 1, heapView: 'HEAPU8', resultType: 'i32' };
|
|
156
150
|
} else if (arr instanceof Uint8Array) {
|
|
157
151
|
return { dtype: DType.U8, bytesPerElement: 1, heapView: 'HEAPU8', resultType: 'u32' };
|
|
158
152
|
}
|
|
@@ -161,9 +155,9 @@ function detectType(arr: any): TypeInfo {
|
|
|
161
155
|
const constructorName = arr.constructor.name;
|
|
162
156
|
|
|
163
157
|
if (constructorName === 'Float16Array') {
|
|
164
|
-
return { dtype: DType.F16, bytesPerElement: 2, heapView: '
|
|
158
|
+
return { dtype: DType.F16, bytesPerElement: 2, heapView: 'HEAPU8', resultType: 'f32' };
|
|
165
159
|
} else if (constructorName === 'BFloat16Array') {
|
|
166
|
-
return { dtype: DType.BF16, bytesPerElement: 2, heapView: '
|
|
160
|
+
return { dtype: DType.BF16, bytesPerElement: 2, heapView: 'HEAPU8', resultType: 'f32' };
|
|
167
161
|
} else if (constructorName === 'E4M3Array') {
|
|
168
162
|
throw new Error('E4M3 not yet supported in WASM backend');
|
|
169
163
|
} else if (constructorName === 'E5M2Array') {
|
|
@@ -182,9 +176,9 @@ function typeInfoFromDtype(dtype: DType): TypeInfo {
|
|
|
182
176
|
switch (dtype) {
|
|
183
177
|
case DType.F64: return { dtype, bytesPerElement: 8, heapView: 'HEAPF64', resultType: 'f64' };
|
|
184
178
|
case DType.F32: return { dtype, bytesPerElement: 4, heapView: 'HEAPF32', resultType: 'f64' };
|
|
185
|
-
case DType.F16: return { dtype, bytesPerElement: 2, heapView: '
|
|
186
|
-
case DType.BF16: return { dtype, bytesPerElement: 2, heapView: '
|
|
187
|
-
case DType.I8: return { dtype, bytesPerElement: 1, heapView: '
|
|
179
|
+
case DType.F16: return { dtype, bytesPerElement: 2, heapView: 'HEAPU8', resultType: 'f32' };
|
|
180
|
+
case DType.BF16: return { dtype, bytesPerElement: 2, heapView: 'HEAPU8', resultType: 'f32' };
|
|
181
|
+
case DType.I8: return { dtype, bytesPerElement: 1, heapView: 'HEAPU8', resultType: 'i32' };
|
|
188
182
|
case DType.U8: return { dtype, bytesPerElement: 1, heapView: 'HEAPU8', resultType: 'u32' };
|
|
189
183
|
case DType.U1: return { dtype, bytesPerElement: 1, heapView: 'HEAPU8', resultType: 'u32' };
|
|
190
184
|
default: throw new Error(`Unsupported dtype: ${dtype}`);
|
package/javascript/numkong.c
CHANGED
|
@@ -65,7 +65,7 @@ static int is_compatible_napi_type(napi_typedarray_type napi_type, nk_dtype_t dt
|
|
|
65
65
|
* @param out_dtype The dtype of the value stored in the buffer.
|
|
66
66
|
* @return napi_value containing the result as a JavaScript Number, or NULL on error.
|
|
67
67
|
*/
|
|
68
|
-
static napi_value
|
|
68
|
+
static napi_value nk_scalar_buffer_to_js_number(napi_env env, nk_scalar_buffer_t const *result, nk_dtype_t out_dtype) {
|
|
69
69
|
// i64/u64 must return BigInt since they may exceed Number.MAX_SAFE_INTEGER
|
|
70
70
|
if (out_dtype == nk_i64_k) {
|
|
71
71
|
napi_value js_result;
|
|
@@ -77,30 +77,9 @@ static napi_value scalar_to_js_number(napi_env env, nk_scalar_buffer_t const *re
|
|
|
77
77
|
if (napi_create_bigint_uint64(env, result->u64, &js_result) != napi_ok) return NULL;
|
|
78
78
|
return js_result;
|
|
79
79
|
}
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
case nk_f32_k: result_f64 = (double)result->f32; break;
|
|
84
|
-
case nk_f16_k: {
|
|
85
|
-
nk_f32_t t;
|
|
86
|
-
nk_f16_to_f32(&result->f16, &t);
|
|
87
|
-
result_f64 = (double)t;
|
|
88
|
-
break;
|
|
89
|
-
}
|
|
90
|
-
case nk_bf16_k: {
|
|
91
|
-
nk_f32_t t;
|
|
92
|
-
nk_bf16_to_f32(&result->bf16, &t);
|
|
93
|
-
result_f64 = (double)t;
|
|
94
|
-
break;
|
|
95
|
-
}
|
|
96
|
-
case nk_i8_k: result_f64 = (double)result->i8; break;
|
|
97
|
-
case nk_u8_k: result_f64 = (double)result->u8; break;
|
|
98
|
-
case nk_i16_k: result_f64 = (double)result->i16; break;
|
|
99
|
-
case nk_u16_k: result_f64 = (double)result->u16; break;
|
|
100
|
-
case nk_i32_k: result_f64 = (double)result->i32; break;
|
|
101
|
-
case nk_u32_k: result_f64 = (double)result->u32; break;
|
|
102
|
-
default: napi_throw_error(env, NULL, "Unexpected output dtype in result conversion"); return NULL;
|
|
103
|
-
}
|
|
80
|
+
nk_f64c_t result_c;
|
|
81
|
+
nk_scalar_buffer_to_f64c(result, out_dtype, &result_c);
|
|
82
|
+
double result_f64 = result_c.real;
|
|
104
83
|
napi_value js_result;
|
|
105
84
|
if (napi_create_double(env, result_f64, &js_result) != napi_ok) return NULL;
|
|
106
85
|
return js_result;
|
|
@@ -209,7 +188,7 @@ static napi_value dense(napi_env env, napi_callback_info info, nk_kernel_kind_t
|
|
|
209
188
|
nk_scalar_buffer_t result;
|
|
210
189
|
metric(data_a, data_b, dimensions, &result);
|
|
211
190
|
|
|
212
|
-
return
|
|
191
|
+
return nk_scalar_buffer_to_js_number(env, &result, out_dtype);
|
|
213
192
|
}
|
|
214
193
|
|
|
215
194
|
/** @brief N-API entry for inner product (dot). */
|
package/javascript/numkong.ts
CHANGED
|
@@ -26,18 +26,30 @@
|
|
|
26
26
|
*/
|
|
27
27
|
|
|
28
28
|
import build from "node-gyp-build";
|
|
29
|
+
import { createRequire } from "node:module";
|
|
29
30
|
import * as path from "node:path";
|
|
30
31
|
import { existsSync } from "node:fs";
|
|
31
32
|
import { getFileName, getRoot } from "bindings";
|
|
32
33
|
import { setConversionFunctions, Float16Array, BFloat16Array, E4M3Array, E5M2Array, BinaryArray, TensorBase, VectorBase, VectorView, Vector, MatrixBase, Matrix, PackedMatrix, DType, dtypeToString, outputDtype, KernelFamily } from "./types.js";
|
|
33
34
|
|
|
34
|
-
|
|
35
|
+
function loadNativeAddon(): any {
|
|
36
|
+
// Tier 1: platform-specific optional dependency (@numkong/<os>-<arch>)
|
|
37
|
+
try {
|
|
38
|
+
const req = createRequire(path.join(getDirName(), "noop.js"));
|
|
39
|
+
return req(`@numkong/${process.platform}-${process.arch}`);
|
|
40
|
+
} catch { }
|
|
41
|
+
|
|
42
|
+
// Tier 2: node-gyp-build fallback (local dev, unsupported platform, build-from-source)
|
|
43
|
+
try {
|
|
44
|
+
return build(getBuildDir(getDirName()));
|
|
45
|
+
} catch { }
|
|
35
46
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
compiled = build(builddir);
|
|
47
|
+
return null;
|
|
48
|
+
}
|
|
39
49
|
|
|
40
|
-
|
|
50
|
+
let compiled: any = loadNativeAddon();
|
|
51
|
+
|
|
52
|
+
if (compiled) {
|
|
41
53
|
setConversionFunctions({
|
|
42
54
|
castF16ToF32: compiled.castF16ToF32,
|
|
43
55
|
castF32ToF16: compiled.castF32ToF16,
|
|
@@ -49,12 +61,11 @@ try {
|
|
|
49
61
|
castF32ToE5M2: compiled.castF32ToE5M2,
|
|
50
62
|
cast: compiled.cast,
|
|
51
63
|
});
|
|
52
|
-
}
|
|
53
|
-
// Native addon not available
|
|
54
|
-
// For WASM usage, import the Emscripten module directly (see test/test-wasm.mjs)
|
|
64
|
+
} else {
|
|
55
65
|
throw new Error(
|
|
56
|
-
"NumKong native addon not found.
|
|
57
|
-
"
|
|
66
|
+
"NumKong native addon not found. Install with `npm install numkong` (which fetches " +
|
|
67
|
+
"the prebuilt binary), or build from source with `npm run install`. " +
|
|
68
|
+
"For WASM, import from 'numkong/wasm' instead."
|
|
58
69
|
);
|
|
59
70
|
}
|
|
60
71
|
|
|
@@ -76,7 +87,7 @@ export const Capability = {
|
|
|
76
87
|
SVE: 1n << 10n, // 2020: ARM SVE
|
|
77
88
|
SVEHALF: 1n << 11n, // 2020: ARM SVE FP16
|
|
78
89
|
SVESDOT: 1n << 12n, // 2020: ARM SVE i8 dot
|
|
79
|
-
|
|
90
|
+
ALDER: 1n << 13n, // 2021: Intel AVX2+VNNI
|
|
80
91
|
SVEBFDOT: 1n << 14n, // 2021: ARM SVE BF16
|
|
81
92
|
SVE2: 1n << 15n, // 2022: ARM SVE2
|
|
82
93
|
V128RELAXED: 1n << 16n, // 2022: WASM Relaxed SIMD
|
|
@@ -97,6 +108,12 @@ export const Capability = {
|
|
|
97
108
|
SMEBF16: 1n << 31n, // 2025+: ARM SME B16B16
|
|
98
109
|
SMELUT2: 1n << 32n, // 2025+: ARM SME LUTv2
|
|
99
110
|
RVVBB: 1n << 33n, // 2025+: RISC-V Zvbb
|
|
111
|
+
SIERRA: 1n << 34n, // 2024: Intel AVXVNNIINT8
|
|
112
|
+
SMEBI32: 1n << 35n, // 2025+: ARM SME BI32I32
|
|
113
|
+
LOONGSONASX: 1n << 36n, // LoongArch LASX 256-bit SIMD
|
|
114
|
+
POWERVSX: 1n << 37n, // Power VSX 128-bit SIMD
|
|
115
|
+
DIAMOND: 1n << 38n, // 2025+: Intel AVX10.2
|
|
116
|
+
NEONFP8: 1n << 39n, // ARM NEON FP8
|
|
100
117
|
} as const;
|
|
101
118
|
|
|
102
119
|
export { Float16Array, BFloat16Array, E4M3Array, E5M2Array, BinaryArray, TensorBase, VectorBase, VectorView, Vector, MatrixBase, Matrix, PackedMatrix, outputDtype };
|
|
@@ -571,5 +588,13 @@ function getDirName() {
|
|
|
571
588
|
try {
|
|
572
589
|
if (__dirname) return __dirname;
|
|
573
590
|
} catch (e) { }
|
|
591
|
+
// Fall back to cwd, which is typically the project root in dev and CI.
|
|
592
|
+
// This helps runtimes like Deno and Bun where the `bindings` module's
|
|
593
|
+
// V8 stack-trace hack may not resolve correctly.
|
|
594
|
+
try {
|
|
595
|
+
const cwd = process.cwd();
|
|
596
|
+
if (existsSync(path.join(cwd, "build")) || existsSync(path.join(cwd, "prebuilds")))
|
|
597
|
+
return cwd;
|
|
598
|
+
} catch (e) { }
|
|
574
599
|
return getRoot(getFileName());
|
|
575
600
|
}
|