numkong 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +495 -0
- package/binding.gyp +540 -0
- package/c/dispatch.h +512 -0
- package/c/dispatch_bf16.c +389 -0
- package/c/dispatch_bf16c.c +52 -0
- package/c/dispatch_e2m3.c +263 -0
- package/c/dispatch_e3m2.c +243 -0
- package/c/dispatch_e4m3.c +276 -0
- package/c/dispatch_e5m2.c +272 -0
- package/c/dispatch_f16.c +376 -0
- package/c/dispatch_f16c.c +58 -0
- package/c/dispatch_f32.c +378 -0
- package/c/dispatch_f32c.c +99 -0
- package/c/dispatch_f64.c +296 -0
- package/c/dispatch_f64c.c +98 -0
- package/c/dispatch_i16.c +96 -0
- package/c/dispatch_i32.c +89 -0
- package/c/dispatch_i4.c +150 -0
- package/c/dispatch_i64.c +86 -0
- package/c/dispatch_i8.c +289 -0
- package/c/dispatch_other.c +330 -0
- package/c/dispatch_u1.c +148 -0
- package/c/dispatch_u16.c +124 -0
- package/c/dispatch_u32.c +118 -0
- package/c/dispatch_u4.c +150 -0
- package/c/dispatch_u64.c +102 -0
- package/c/dispatch_u8.c +303 -0
- package/c/numkong.c +950 -0
- package/include/README.md +573 -0
- package/include/module.modulemap +129 -0
- package/include/numkong/attention/sapphireamx.h +1361 -0
- package/include/numkong/attention/sme.h +2066 -0
- package/include/numkong/attention.h +49 -0
- package/include/numkong/capabilities.h +748 -0
- package/include/numkong/cast/README.md +262 -0
- package/include/numkong/cast/haswell.h +975 -0
- package/include/numkong/cast/icelake.h +470 -0
- package/include/numkong/cast/neon.h +1192 -0
- package/include/numkong/cast/rvv.h +1021 -0
- package/include/numkong/cast/sapphire.h +262 -0
- package/include/numkong/cast/serial.h +2262 -0
- package/include/numkong/cast/skylake.h +856 -0
- package/include/numkong/cast/v128relaxed.h +180 -0
- package/include/numkong/cast.h +230 -0
- package/include/numkong/curved/README.md +223 -0
- package/include/numkong/curved/genoa.h +182 -0
- package/include/numkong/curved/haswell.h +276 -0
- package/include/numkong/curved/neon.h +205 -0
- package/include/numkong/curved/neonbfdot.h +212 -0
- package/include/numkong/curved/neonhalf.h +212 -0
- package/include/numkong/curved/rvv.h +305 -0
- package/include/numkong/curved/serial.h +207 -0
- package/include/numkong/curved/skylake.h +457 -0
- package/include/numkong/curved/smef64.h +506 -0
- package/include/numkong/curved.h +517 -0
- package/include/numkong/curved.hpp +144 -0
- package/include/numkong/dot/README.md +425 -0
- package/include/numkong/dot/alder.h +563 -0
- package/include/numkong/dot/genoa.h +315 -0
- package/include/numkong/dot/haswell.h +1688 -0
- package/include/numkong/dot/icelake.h +883 -0
- package/include/numkong/dot/neon.h +818 -0
- package/include/numkong/dot/neonbfdot.h +244 -0
- package/include/numkong/dot/neonfhm.h +360 -0
- package/include/numkong/dot/neonhalf.h +198 -0
- package/include/numkong/dot/neonsdot.h +508 -0
- package/include/numkong/dot/rvv.h +714 -0
- package/include/numkong/dot/rvvbb.h +72 -0
- package/include/numkong/dot/rvvbf16.h +123 -0
- package/include/numkong/dot/rvvhalf.h +129 -0
- package/include/numkong/dot/sapphire.h +141 -0
- package/include/numkong/dot/serial.h +838 -0
- package/include/numkong/dot/sierra.h +405 -0
- package/include/numkong/dot/skylake.h +1084 -0
- package/include/numkong/dot/sve.h +379 -0
- package/include/numkong/dot/svebfdot.h +74 -0
- package/include/numkong/dot/svehalf.h +123 -0
- package/include/numkong/dot/v128relaxed.h +1258 -0
- package/include/numkong/dot.h +1070 -0
- package/include/numkong/dot.hpp +94 -0
- package/include/numkong/dots/README.md +496 -0
- package/include/numkong/dots/alder.h +114 -0
- package/include/numkong/dots/genoa.h +94 -0
- package/include/numkong/dots/haswell.h +295 -0
- package/include/numkong/dots/icelake.h +171 -0
- package/include/numkong/dots/neon.h +120 -0
- package/include/numkong/dots/neonbfdot.h +58 -0
- package/include/numkong/dots/neonfhm.h +94 -0
- package/include/numkong/dots/neonhalf.h +57 -0
- package/include/numkong/dots/neonsdot.h +108 -0
- package/include/numkong/dots/rvv.h +2486 -0
- package/include/numkong/dots/sapphireamx.h +3973 -0
- package/include/numkong/dots/serial.h +2844 -0
- package/include/numkong/dots/sierra.h +97 -0
- package/include/numkong/dots/skylake.h +196 -0
- package/include/numkong/dots/sme.h +5372 -0
- package/include/numkong/dots/smebi32.h +461 -0
- package/include/numkong/dots/smef64.h +1318 -0
- package/include/numkong/dots/smehalf.h +47 -0
- package/include/numkong/dots/v128relaxed.h +294 -0
- package/include/numkong/dots.h +2804 -0
- package/include/numkong/dots.hpp +639 -0
- package/include/numkong/each/README.md +469 -0
- package/include/numkong/each/haswell.h +1658 -0
- package/include/numkong/each/icelake.h +272 -0
- package/include/numkong/each/neon.h +1104 -0
- package/include/numkong/each/neonbfdot.h +212 -0
- package/include/numkong/each/neonhalf.h +410 -0
- package/include/numkong/each/rvv.h +1121 -0
- package/include/numkong/each/sapphire.h +477 -0
- package/include/numkong/each/serial.h +260 -0
- package/include/numkong/each/skylake.h +1562 -0
- package/include/numkong/each.h +2146 -0
- package/include/numkong/each.hpp +434 -0
- package/include/numkong/geospatial/README.md +147 -0
- package/include/numkong/geospatial/haswell.h +593 -0
- package/include/numkong/geospatial/neon.h +571 -0
- package/include/numkong/geospatial/rvv.h +701 -0
- package/include/numkong/geospatial/serial.h +309 -0
- package/include/numkong/geospatial/skylake.h +577 -0
- package/include/numkong/geospatial/v128relaxed.h +613 -0
- package/include/numkong/geospatial.h +453 -0
- package/include/numkong/geospatial.hpp +235 -0
- package/include/numkong/matrix.hpp +336 -0
- package/include/numkong/maxsim/README.md +187 -0
- package/include/numkong/maxsim/alder.h +511 -0
- package/include/numkong/maxsim/genoa.h +115 -0
- package/include/numkong/maxsim/haswell.h +553 -0
- package/include/numkong/maxsim/icelake.h +480 -0
- package/include/numkong/maxsim/neonsdot.h +394 -0
- package/include/numkong/maxsim/sapphireamx.h +877 -0
- package/include/numkong/maxsim/serial.h +490 -0
- package/include/numkong/maxsim/sme.h +929 -0
- package/include/numkong/maxsim/v128relaxed.h +280 -0
- package/include/numkong/maxsim.h +571 -0
- package/include/numkong/maxsim.hpp +133 -0
- package/include/numkong/mesh/README.md +227 -0
- package/include/numkong/mesh/haswell.h +2235 -0
- package/include/numkong/mesh/neon.h +1329 -0
- package/include/numkong/mesh/neonbfdot.h +842 -0
- package/include/numkong/mesh/neonhalf.h +616 -0
- package/include/numkong/mesh/rvv.h +916 -0
- package/include/numkong/mesh/serial.h +742 -0
- package/include/numkong/mesh/skylake.h +1135 -0
- package/include/numkong/mesh/v128relaxed.h +1052 -0
- package/include/numkong/mesh.h +652 -0
- package/include/numkong/mesh.hpp +762 -0
- package/include/numkong/numkong.h +78 -0
- package/include/numkong/numkong.hpp +57 -0
- package/include/numkong/probability/README.md +173 -0
- package/include/numkong/probability/haswell.h +267 -0
- package/include/numkong/probability/neon.h +225 -0
- package/include/numkong/probability/rvv.h +409 -0
- package/include/numkong/probability/serial.h +169 -0
- package/include/numkong/probability/skylake.h +324 -0
- package/include/numkong/probability.h +383 -0
- package/include/numkong/probability.hpp +120 -0
- package/include/numkong/random.h +50 -0
- package/include/numkong/random.hpp +285 -0
- package/include/numkong/reduce/README.md +547 -0
- package/include/numkong/reduce/alder.h +632 -0
- package/include/numkong/reduce/genoa.h +201 -0
- package/include/numkong/reduce/haswell.h +3783 -0
- package/include/numkong/reduce/icelake.h +549 -0
- package/include/numkong/reduce/neon.h +3841 -0
- package/include/numkong/reduce/neonbfdot.h +353 -0
- package/include/numkong/reduce/neonfhm.h +665 -0
- package/include/numkong/reduce/neonhalf.h +157 -0
- package/include/numkong/reduce/neonsdot.h +357 -0
- package/include/numkong/reduce/rvv.h +3407 -0
- package/include/numkong/reduce/serial.h +757 -0
- package/include/numkong/reduce/sierra.h +338 -0
- package/include/numkong/reduce/skylake.h +3792 -0
- package/include/numkong/reduce/v128relaxed.h +2302 -0
- package/include/numkong/reduce.h +1597 -0
- package/include/numkong/reduce.hpp +633 -0
- package/include/numkong/scalar/README.md +89 -0
- package/include/numkong/scalar/haswell.h +113 -0
- package/include/numkong/scalar/neon.h +122 -0
- package/include/numkong/scalar/neonhalf.h +70 -0
- package/include/numkong/scalar/rvv.h +211 -0
- package/include/numkong/scalar/sapphire.h +63 -0
- package/include/numkong/scalar/serial.h +332 -0
- package/include/numkong/scalar/v128relaxed.h +56 -0
- package/include/numkong/scalar.h +683 -0
- package/include/numkong/set/README.md +179 -0
- package/include/numkong/set/haswell.h +334 -0
- package/include/numkong/set/icelake.h +485 -0
- package/include/numkong/set/neon.h +364 -0
- package/include/numkong/set/rvv.h +226 -0
- package/include/numkong/set/rvvbb.h +117 -0
- package/include/numkong/set/serial.h +174 -0
- package/include/numkong/set/sve.h +185 -0
- package/include/numkong/set/v128relaxed.h +240 -0
- package/include/numkong/set.h +457 -0
- package/include/numkong/set.hpp +114 -0
- package/include/numkong/sets/README.md +149 -0
- package/include/numkong/sets/haswell.h +63 -0
- package/include/numkong/sets/icelake.h +66 -0
- package/include/numkong/sets/neon.h +61 -0
- package/include/numkong/sets/serial.h +43 -0
- package/include/numkong/sets/smebi32.h +1099 -0
- package/include/numkong/sets/v128relaxed.h +58 -0
- package/include/numkong/sets.h +339 -0
- package/include/numkong/sparse/README.md +156 -0
- package/include/numkong/sparse/icelake.h +463 -0
- package/include/numkong/sparse/neon.h +288 -0
- package/include/numkong/sparse/serial.h +117 -0
- package/include/numkong/sparse/sve2.h +507 -0
- package/include/numkong/sparse/turin.h +322 -0
- package/include/numkong/sparse.h +363 -0
- package/include/numkong/sparse.hpp +113 -0
- package/include/numkong/spatial/README.md +435 -0
- package/include/numkong/spatial/alder.h +607 -0
- package/include/numkong/spatial/genoa.h +290 -0
- package/include/numkong/spatial/haswell.h +960 -0
- package/include/numkong/spatial/icelake.h +586 -0
- package/include/numkong/spatial/neon.h +773 -0
- package/include/numkong/spatial/neonbfdot.h +165 -0
- package/include/numkong/spatial/neonhalf.h +118 -0
- package/include/numkong/spatial/neonsdot.h +261 -0
- package/include/numkong/spatial/rvv.h +984 -0
- package/include/numkong/spatial/rvvbf16.h +123 -0
- package/include/numkong/spatial/rvvhalf.h +117 -0
- package/include/numkong/spatial/sapphire.h +343 -0
- package/include/numkong/spatial/serial.h +346 -0
- package/include/numkong/spatial/sierra.h +323 -0
- package/include/numkong/spatial/skylake.h +606 -0
- package/include/numkong/spatial/sve.h +224 -0
- package/include/numkong/spatial/svebfdot.h +122 -0
- package/include/numkong/spatial/svehalf.h +109 -0
- package/include/numkong/spatial/v128relaxed.h +717 -0
- package/include/numkong/spatial.h +1425 -0
- package/include/numkong/spatial.hpp +183 -0
- package/include/numkong/spatials/README.md +580 -0
- package/include/numkong/spatials/alder.h +94 -0
- package/include/numkong/spatials/genoa.h +94 -0
- package/include/numkong/spatials/haswell.h +219 -0
- package/include/numkong/spatials/icelake.h +113 -0
- package/include/numkong/spatials/neon.h +109 -0
- package/include/numkong/spatials/neonbfdot.h +60 -0
- package/include/numkong/spatials/neonfhm.h +92 -0
- package/include/numkong/spatials/neonhalf.h +58 -0
- package/include/numkong/spatials/neonsdot.h +109 -0
- package/include/numkong/spatials/rvv.h +1960 -0
- package/include/numkong/spatials/sapphireamx.h +1149 -0
- package/include/numkong/spatials/serial.h +226 -0
- package/include/numkong/spatials/sierra.h +96 -0
- package/include/numkong/spatials/skylake.h +184 -0
- package/include/numkong/spatials/sme.h +1901 -0
- package/include/numkong/spatials/smef64.h +465 -0
- package/include/numkong/spatials/v128relaxed.h +240 -0
- package/include/numkong/spatials.h +3021 -0
- package/include/numkong/spatials.hpp +508 -0
- package/include/numkong/tensor.hpp +1592 -0
- package/include/numkong/trigonometry/README.md +184 -0
- package/include/numkong/trigonometry/haswell.h +652 -0
- package/include/numkong/trigonometry/neon.h +639 -0
- package/include/numkong/trigonometry/rvv.h +699 -0
- package/include/numkong/trigonometry/serial.h +703 -0
- package/include/numkong/trigonometry/skylake.h +721 -0
- package/include/numkong/trigonometry/v128relaxed.h +666 -0
- package/include/numkong/trigonometry.h +467 -0
- package/include/numkong/trigonometry.hpp +166 -0
- package/include/numkong/types.h +1384 -0
- package/include/numkong/types.hpp +5603 -0
- package/include/numkong/vector.hpp +698 -0
- package/javascript/README.md +246 -0
- package/javascript/dist/cjs/numkong-wasm.d.ts +166 -0
- package/javascript/dist/cjs/numkong-wasm.js +617 -0
- package/javascript/dist/cjs/numkong.d.ts +343 -0
- package/javascript/dist/cjs/numkong.js +523 -0
- package/javascript/dist/cjs/package.json +3 -0
- package/javascript/dist/cjs/types.d.ts +284 -0
- package/javascript/dist/cjs/types.js +653 -0
- package/javascript/dist/esm/numkong-wasm.d.ts +166 -0
- package/javascript/dist/esm/numkong-wasm.js +595 -0
- package/javascript/dist/esm/numkong.d.ts +343 -0
- package/javascript/dist/esm/numkong.js +452 -0
- package/javascript/dist/esm/package.json +3 -0
- package/javascript/dist/esm/types.d.ts +284 -0
- package/javascript/dist/esm/types.js +630 -0
- package/javascript/dist-package-cjs.json +3 -0
- package/javascript/dist-package-esm.json +3 -0
- package/javascript/node-gyp-build.d.ts +1 -0
- package/javascript/numkong-wasm.ts +756 -0
- package/javascript/numkong.c +689 -0
- package/javascript/numkong.ts +575 -0
- package/javascript/tsconfig-base.json +39 -0
- package/javascript/tsconfig-cjs.json +8 -0
- package/javascript/tsconfig-esm.json +8 -0
- package/javascript/types.ts +674 -0
- package/package.json +87 -0
|
@@ -0,0 +1,452 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview NumKong - Portable mixed-precision BLAS-like vector math library
|
|
3
|
+
*
|
|
4
|
+
* NumKong provides SIMD-accelerated distance metrics and vector operations for
|
|
5
|
+
* x86, ARM, RISC-V, and WASM platforms. The library automatically detects and uses
|
|
6
|
+
* the best available SIMD instruction set at runtime.
|
|
7
|
+
*
|
|
8
|
+
* @module numkong
|
|
9
|
+
* @author Ash Vardanian
|
|
10
|
+
*
|
|
11
|
+
* @example
|
|
12
|
+
* ```typescript
|
|
13
|
+
* import { dot, euclidean, Float16Array } from 'numkong';
|
|
14
|
+
*
|
|
15
|
+
* // Auto-detected types
|
|
16
|
+
* const a = new Float32Array([1, 2, 3]);
|
|
17
|
+
* const b = new Float32Array([4, 5, 6]);
|
|
18
|
+
* dot(a, b); // 32
|
|
19
|
+
* euclidean(a, b); // 5.196...
|
|
20
|
+
*
|
|
21
|
+
* // Custom types with explicit dtype
|
|
22
|
+
* const c = new Float16Array([1, 2, 3]);
|
|
23
|
+
* const d = new Float16Array([4, 5, 6]);
|
|
24
|
+
* dot(c, d, DType.F16); // 32
|
|
25
|
+
* ```
|
|
26
|
+
*/
|
|
27
|
+
import build from "node-gyp-build";
|
|
28
|
+
import * as path from "node:path";
|
|
29
|
+
import { existsSync } from "node:fs";
|
|
30
|
+
import { getFileName, getRoot } from "bindings";
|
|
31
|
+
import { setConversionFunctions, Float16Array, BFloat16Array, E4M3Array, E5M2Array, BinaryArray, TensorBase, VectorBase, VectorView, Vector, MatrixBase, Matrix, PackedMatrix, DType, dtypeToString, outputDtype } from "./types.js";
|
|
32
|
+
let compiled;
|
|
33
|
+
try {
|
|
34
|
+
let builddir = getBuildDir(getDirName());
|
|
35
|
+
compiled = build(builddir);
|
|
36
|
+
// Initialize conversion functions for types.ts
|
|
37
|
+
setConversionFunctions({
|
|
38
|
+
castF16ToF32: compiled.castF16ToF32,
|
|
39
|
+
castF32ToF16: compiled.castF32ToF16,
|
|
40
|
+
castBF16ToF32: compiled.castBF16ToF32,
|
|
41
|
+
castF32ToBF16: compiled.castF32ToBF16,
|
|
42
|
+
castE4M3ToF32: compiled.castE4M3ToF32,
|
|
43
|
+
castF32ToE4M3: compiled.castF32ToE4M3,
|
|
44
|
+
castE5M2ToF32: compiled.castE5M2ToF32,
|
|
45
|
+
castF32ToE5M2: compiled.castF32ToE5M2,
|
|
46
|
+
cast: compiled.cast,
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
catch (e) {
|
|
50
|
+
// Native addon not available
|
|
51
|
+
// For WASM usage, import the Emscripten module directly (see test/test-wasm.mjs)
|
|
52
|
+
throw new Error("NumKong native addon not found. Build with `npm run build` or use WASM " +
|
|
53
|
+
"by importing the Emscripten module directly. See test/test-wasm.mjs for examples.");
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* CPU capability bit masks in chronological order (by first commercial silicon).
|
|
57
|
+
* Use these with getCapabilities() to check for specific SIMD support.
|
|
58
|
+
*/
|
|
59
|
+
export const Capability = {
|
|
60
|
+
SERIAL: 1n << 0n, // Always: Fallback
|
|
61
|
+
NEON: 1n << 1n, // 2013: ARM NEON
|
|
62
|
+
HASWELL: 1n << 2n, // 2013: Intel AVX2
|
|
63
|
+
SKYLAKE: 1n << 3n, // 2017: Intel AVX-512
|
|
64
|
+
NEONHALF: 1n << 4n, // 2017: ARM NEON FP16
|
|
65
|
+
NEONSDOT: 1n << 5n, // 2017: ARM NEON i8 dot
|
|
66
|
+
NEONFHM: 1n << 6n, // 2018: ARM NEON FP16 FML
|
|
67
|
+
ICELAKE: 1n << 7n, // 2019: Intel AVX-512 VNNI
|
|
68
|
+
GENOA: 1n << 8n, // 2020: Intel/AMD AVX-512 BF16
|
|
69
|
+
NEONBFDOT: 1n << 9n, // 2020: ARM NEON BF16
|
|
70
|
+
SVE: 1n << 10n, // 2020: ARM SVE
|
|
71
|
+
SVEHALF: 1n << 11n, // 2020: ARM SVE FP16
|
|
72
|
+
SVESDOT: 1n << 12n, // 2020: ARM SVE i8 dot
|
|
73
|
+
SIERRA: 1n << 13n, // 2021: Intel AVX2+VNNI
|
|
74
|
+
SVEBFDOT: 1n << 14n, // 2021: ARM SVE BF16
|
|
75
|
+
SVE2: 1n << 15n, // 2022: ARM SVE2
|
|
76
|
+
V128RELAXED: 1n << 16n, // 2022: WASM Relaxed SIMD
|
|
77
|
+
SAPPHIRE: 1n << 17n, // 2023: Intel AVX-512 FP16
|
|
78
|
+
SAPPHIREAMX: 1n << 18n, // 2023: Intel Sapphire AMX
|
|
79
|
+
RVV: 1n << 19n, // 2023: RISC-V Vector
|
|
80
|
+
RVVHALF: 1n << 20n, // 2023: RISC-V Zvfh
|
|
81
|
+
RVVBF16: 1n << 21n, // 2023: RISC-V Zvfbfwma
|
|
82
|
+
GRANITEAMX: 1n << 22n, // 2024: Intel Granite AMX FP16
|
|
83
|
+
TURIN: 1n << 23n, // 2024: AMD Turin AVX-512 CD
|
|
84
|
+
SME: 1n << 24n, // 2024: ARM SME
|
|
85
|
+
SME2: 1n << 25n, // 2024: ARM SME2
|
|
86
|
+
SMEF64: 1n << 26n, // 2024: ARM SME F64
|
|
87
|
+
SMEFA64: 1n << 27n, // 2024: ARM SME FA64
|
|
88
|
+
SVE2P1: 1n << 28n, // 2025+: ARM SVE2.1
|
|
89
|
+
SME2P1: 1n << 29n, // 2025+: ARM SME2.1
|
|
90
|
+
SMEHALF: 1n << 30n, // 2025+: ARM SME F16F16
|
|
91
|
+
SMEBF16: 1n << 31n, // 2025+: ARM SME B16B16
|
|
92
|
+
SMELUT2: 1n << 32n, // 2025+: ARM SME LUTv2
|
|
93
|
+
RVVBB: 1n << 33n, // 2025+: RISC-V Zvbb
|
|
94
|
+
};
|
|
95
|
+
export { Float16Array, BFloat16Array, E4M3Array, E5M2Array, BinaryArray, TensorBase, VectorBase, VectorView, Vector, MatrixBase, Matrix, PackedMatrix, outputDtype };
|
|
96
|
+
/** Convert a single FP16 value (as uint16 bits) to FP32 */
|
|
97
|
+
export const castF16ToF32 = compiled.castF16ToF32;
|
|
98
|
+
/** Convert a single FP32 value to FP16 (returns uint16 bits) */
|
|
99
|
+
export const castF32ToF16 = compiled.castF32ToF16;
|
|
100
|
+
/** Convert a single BF16 value (as uint16 bits) to FP32 */
|
|
101
|
+
export const castBF16ToF32 = compiled.castBF16ToF32;
|
|
102
|
+
/** Convert a single FP32 value to BF16 (returns uint16 bits) */
|
|
103
|
+
export const castF32ToBF16 = compiled.castF32ToBF16;
|
|
104
|
+
/** Convert a single E4M3 value (as uint8 bits) to FP32 */
|
|
105
|
+
export const castE4M3ToF32 = compiled.castE4M3ToF32;
|
|
106
|
+
/** Convert a single FP32 value to E4M3 (returns uint8 bits) */
|
|
107
|
+
export const castF32ToE4M3 = compiled.castF32ToE4M3;
|
|
108
|
+
/** Convert a single E5M2 value (as uint8 bits) to FP32 */
|
|
109
|
+
export const castE5M2ToF32 = compiled.castE5M2ToF32;
|
|
110
|
+
/** Convert a single FP32 value to E5M2 (returns uint8 bits) */
|
|
111
|
+
export const castF32ToE5M2 = compiled.castF32ToE5M2;
|
|
112
|
+
/** Bulk conversion between different numeric types (modifies destination array in-place) */
|
|
113
|
+
export const cast = compiled.cast;
|
|
114
|
+
export { DType };
|
|
115
|
+
/**
|
|
116
|
+
* Extract a TypedArray from a TensorBase for the N-API backend.
|
|
117
|
+
*
|
|
118
|
+
* The native backend doesn't benefit from zero-copy TensorBase (Node.js TypedArrays
|
|
119
|
+
* already share process memory), but accepting TensorBase keeps the API uniform.
|
|
120
|
+
*/
|
|
121
|
+
function unwrapTensor(input) {
|
|
122
|
+
switch (input.dtype) {
|
|
123
|
+
case DType.F64: return { arr: new Float64Array(input.buffer, input.byteOffset, input.length), dtype: input.dtype };
|
|
124
|
+
case DType.F32: return { arr: new Float32Array(input.buffer, input.byteOffset, input.length), dtype: input.dtype };
|
|
125
|
+
case DType.F16:
|
|
126
|
+
case DType.BF16: return { arr: new Uint16Array(input.buffer, input.byteOffset, input.length), dtype: input.dtype };
|
|
127
|
+
case DType.I8: return { arr: new Int8Array(input.buffer, input.byteOffset, input.length), dtype: input.dtype };
|
|
128
|
+
case DType.U8:
|
|
129
|
+
case DType.U1: return { arr: new Uint8Array(input.buffer, input.byteOffset, input.length), dtype: input.dtype };
|
|
130
|
+
default: return { arr: new Uint8Array(input.buffer, input.byteOffset, input.length), dtype: input.dtype };
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* Returns the runtime-detected SIMD capabilities as a bitmask.
|
|
135
|
+
*
|
|
136
|
+
* The bitmask includes flags for various SIMD instruction sets like AVX2, AVX-512,
|
|
137
|
+
* ARM NEON, ARM SVE, ARM SME, RISC-V Vector, and WASM SIMD extensions.
|
|
138
|
+
* Use with Capability constants to check for specific instruction sets.
|
|
139
|
+
*
|
|
140
|
+
* @returns {bigint} Bitmask of capability flags (use with Capability constants)
|
|
141
|
+
*
|
|
142
|
+
* @example
|
|
143
|
+
* ```ts
|
|
144
|
+
* import { getCapabilities, Capability } from 'numkong';
|
|
145
|
+
*
|
|
146
|
+
* const caps = getCapabilities();
|
|
147
|
+
* console.log(`Capabilities: 0x${caps.toString(16)}`);
|
|
148
|
+
*
|
|
149
|
+
* // Check for specific SIMD support
|
|
150
|
+
* if (caps & Capability.HASWELL) {
|
|
151
|
+
* console.log('AVX2 available');
|
|
152
|
+
* }
|
|
153
|
+
* ```
|
|
154
|
+
*/
|
|
155
|
+
export const getCapabilities = () => {
|
|
156
|
+
return compiled.getCapabilities();
|
|
157
|
+
};
|
|
158
|
+
/**
|
|
159
|
+
* Checks if a specific SIMD capability is available at runtime.
|
|
160
|
+
*
|
|
161
|
+
* This is a convenience wrapper around getCapabilities() that tests for a single capability.
|
|
162
|
+
*
|
|
163
|
+
* @param {bigint} cap - Capability flag to check (from Capability constants)
|
|
164
|
+
* @returns {boolean} True if the capability is available, false otherwise
|
|
165
|
+
*
|
|
166
|
+
* @example
|
|
167
|
+
* ```ts
|
|
168
|
+
* import { hasCapability, Capability } from 'numkong';
|
|
169
|
+
*
|
|
170
|
+
* if (hasCapability(Capability.HASWELL)) {
|
|
171
|
+
* console.log('Intel AVX2 (Haswell) available');
|
|
172
|
+
* }
|
|
173
|
+
* if (hasCapability(Capability.NEON)) {
|
|
174
|
+
* console.log('ARM NEON available');
|
|
175
|
+
* }
|
|
176
|
+
* if (hasCapability(Capability.V128RELAXED)) {
|
|
177
|
+
* console.log('WASM Relaxed SIMD available');
|
|
178
|
+
* }
|
|
179
|
+
* ```
|
|
180
|
+
*/
|
|
181
|
+
export const hasCapability = (cap) => {
|
|
182
|
+
return (getCapabilities() & cap) !== 0n;
|
|
183
|
+
};
|
|
184
|
+
export function sqeuclidean(a, b, dtype) {
|
|
185
|
+
if (a instanceof TensorBase) {
|
|
186
|
+
const u = unwrapTensor(a), v = unwrapTensor(b);
|
|
187
|
+
return compiled.sqeuclidean(u.arr, v.arr, dtypeToString(u.dtype));
|
|
188
|
+
}
|
|
189
|
+
return dtype !== undefined ? compiled.sqeuclidean(a, b, dtypeToString(dtype)) : compiled.sqeuclidean(a, b);
|
|
190
|
+
}
|
|
191
|
+
export function euclidean(a, b, dtype) {
|
|
192
|
+
if (a instanceof TensorBase) {
|
|
193
|
+
const u = unwrapTensor(a), v = unwrapTensor(b);
|
|
194
|
+
return compiled.euclidean(u.arr, v.arr, dtypeToString(u.dtype));
|
|
195
|
+
}
|
|
196
|
+
return dtype !== undefined ? compiled.euclidean(a, b, dtypeToString(dtype)) : compiled.euclidean(a, b);
|
|
197
|
+
}
|
|
198
|
+
export function angular(a, b, dtype) {
|
|
199
|
+
if (a instanceof TensorBase) {
|
|
200
|
+
const u = unwrapTensor(a), v = unwrapTensor(b);
|
|
201
|
+
return compiled.angular(u.arr, v.arr, dtypeToString(u.dtype));
|
|
202
|
+
}
|
|
203
|
+
return dtype !== undefined ? compiled.angular(a, b, dtypeToString(dtype)) : compiled.angular(a, b);
|
|
204
|
+
}
|
|
205
|
+
export function inner(a, b, dtype) {
|
|
206
|
+
if (a instanceof TensorBase) {
|
|
207
|
+
const u = unwrapTensor(a), v = unwrapTensor(b);
|
|
208
|
+
return compiled.inner(u.arr, v.arr, dtypeToString(u.dtype));
|
|
209
|
+
}
|
|
210
|
+
return dtype !== undefined ? compiled.inner(a, b, dtypeToString(dtype)) : compiled.inner(a, b);
|
|
211
|
+
}
|
|
212
|
+
export function dot(a, b, dtype) {
|
|
213
|
+
if (a instanceof TensorBase) {
|
|
214
|
+
const u = unwrapTensor(a), v = unwrapTensor(b);
|
|
215
|
+
return compiled.dot(u.arr, v.arr, dtypeToString(u.dtype));
|
|
216
|
+
}
|
|
217
|
+
return dtype !== undefined ? compiled.dot(a, b, dtypeToString(dtype)) : compiled.dot(a, b);
|
|
218
|
+
}
|
|
219
|
+
/**
|
|
220
|
+
* Computes the bitwise Hamming distance between two vectors.
|
|
221
|
+
*
|
|
222
|
+
* Both vectors are treated as bit-packed (u1 dtype), where each byte contains 8 bits.
|
|
223
|
+
* Use toBinary() to convert numeric arrays to bit-packed format.
|
|
224
|
+
*
|
|
225
|
+
* @param {Uint8Array | BinaryArray} a - The first bit-packed vector.
|
|
226
|
+
* @param {Uint8Array | BinaryArray} b - The second bit-packed vector.
|
|
227
|
+
* @returns {number} The Hamming distance (number of differing bits) between vectors a and b.
|
|
228
|
+
*/
|
|
229
|
+
export const hamming = (a, b) => {
|
|
230
|
+
if (a instanceof TensorBase) {
|
|
231
|
+
const u = unwrapTensor(a), v = unwrapTensor(b);
|
|
232
|
+
return compiled.hamming(u.arr, v.arr);
|
|
233
|
+
}
|
|
234
|
+
return compiled.hamming(a, b);
|
|
235
|
+
};
|
|
236
|
+
/**
|
|
237
|
+
* Computes the bitwise Jaccard distance between two vectors.
|
|
238
|
+
*
|
|
239
|
+
* Both vectors are treated as bit-packed (u1 dtype), where each byte contains 8 bits.
|
|
240
|
+
* Use toBinary() to convert numeric arrays to bit-packed format.
|
|
241
|
+
*
|
|
242
|
+
* @param {Uint8Array | BinaryArray} a - The first bit-packed vector.
|
|
243
|
+
* @param {Uint8Array | BinaryArray} b - The second bit-packed vector.
|
|
244
|
+
* @returns {number} The Jaccard distance (1 - Jaccard similarity) between vectors a and b.
|
|
245
|
+
*/
|
|
246
|
+
export const jaccard = (a, b) => {
|
|
247
|
+
if (a instanceof TensorBase) {
|
|
248
|
+
const u = unwrapTensor(a), v = unwrapTensor(b);
|
|
249
|
+
return compiled.jaccard(u.arr, v.arr);
|
|
250
|
+
}
|
|
251
|
+
return compiled.jaccard(a, b);
|
|
252
|
+
};
|
|
253
|
+
export function kullbackleibler(a, b, dtype) {
|
|
254
|
+
if (a instanceof TensorBase) {
|
|
255
|
+
const u = unwrapTensor(a), v = unwrapTensor(b);
|
|
256
|
+
return compiled.kullbackleibler(u.arr, v.arr, dtypeToString(u.dtype));
|
|
257
|
+
}
|
|
258
|
+
return dtype !== undefined ? compiled.kullbackleibler(a, b, dtypeToString(dtype)) : compiled.kullbackleibler(a, b);
|
|
259
|
+
}
|
|
260
|
+
export function jensenshannon(a, b, dtype) {
|
|
261
|
+
if (a instanceof TensorBase) {
|
|
262
|
+
const u = unwrapTensor(a), v = unwrapTensor(b);
|
|
263
|
+
return compiled.jensenshannon(u.arr, v.arr, dtypeToString(u.dtype));
|
|
264
|
+
}
|
|
265
|
+
return dtype !== undefined ? compiled.jensenshannon(a, b, dtypeToString(dtype)) : compiled.jensenshannon(a, b);
|
|
266
|
+
}
|
|
267
|
+
/**
|
|
268
|
+
* Quantizes a numeric vector into a bit-packed binary representation.
|
|
269
|
+
*
|
|
270
|
+
* Converts each element to a single bit: 1 for positive values, 0 for non-positive values.
|
|
271
|
+
* The bits are packed into bytes (8 bits per byte) in big-endian bit order within each byte.
|
|
272
|
+
* This is the required format for hamming() and jaccard() distance functions.
|
|
273
|
+
*
|
|
274
|
+
* @param {Float32Array | Float64Array | Int8Array} vector - The vector to quantize and pack.
|
|
275
|
+
* @returns {Uint8Array} A bit-packed array where each byte contains 8 binary values.
|
|
276
|
+
*
|
|
277
|
+
* @example
|
|
278
|
+
* ```ts
|
|
279
|
+
* const vec = new Float32Array([1.5, -2.3, 0.0, 3.1, -1.0, 2.0, 0.5, -0.5]);
|
|
280
|
+
* const binary = toBinary(vec);
|
|
281
|
+
* // Result: Uint8Array([0b10010110]) = [0x96]
|
|
282
|
+
* // bits: [1, 0, 0, 1, 0, 1, 1, 0] for elements [+, -, 0, +, -, +, +, -]
|
|
283
|
+
*
|
|
284
|
+
* // Use with Hamming distance
|
|
285
|
+
* const a = toBinary(new Float32Array([1, 2, 3]));
|
|
286
|
+
* const b = toBinary(new Float32Array([1, -2, 3]));
|
|
287
|
+
* const dist = hamming(a, b); // Counts differing bits
|
|
288
|
+
* ```
|
|
289
|
+
*/
|
|
290
|
+
export const toBinary = (vector) => {
|
|
291
|
+
const byteLength = Math.ceil(vector.length / 8);
|
|
292
|
+
const packedVector = new Uint8Array(byteLength);
|
|
293
|
+
for (let i = 0; i < vector.length; i++) {
|
|
294
|
+
if (vector[i] > 0) {
|
|
295
|
+
const byteIndex = Math.floor(i / 8);
|
|
296
|
+
const bitPosition = 7 - (i % 8);
|
|
297
|
+
packedVector[byteIndex] |= (1 << bitPosition);
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
return packedVector;
|
|
301
|
+
};
|
|
302
|
+
/**
|
|
303
|
+
* Extract a TypedArray from a Matrix for passing to the N-API backend.
|
|
304
|
+
*/
|
|
305
|
+
function unwrapMatrix(matrix) {
|
|
306
|
+
switch (matrix.dtype) {
|
|
307
|
+
case DType.F64: return { array: new Float64Array(matrix.buffer, matrix.byteOffset, matrix.rows * matrix.cols), dtype: matrix.dtype };
|
|
308
|
+
case DType.F32: return { array: new Float32Array(matrix.buffer, matrix.byteOffset, matrix.rows * matrix.cols), dtype: matrix.dtype };
|
|
309
|
+
case DType.F16:
|
|
310
|
+
case DType.BF16: return { array: new Uint16Array(matrix.buffer, matrix.byteOffset, matrix.rows * matrix.cols), dtype: matrix.dtype };
|
|
311
|
+
case DType.I8: return { array: new Int8Array(matrix.buffer, matrix.byteOffset, matrix.rows * matrix.cols), dtype: matrix.dtype };
|
|
312
|
+
case DType.U8: return { array: new Uint8Array(matrix.buffer, matrix.byteOffset, matrix.rows * matrix.cols), dtype: matrix.dtype };
|
|
313
|
+
default: return { array: new Uint8Array(matrix.buffer, matrix.byteOffset, matrix.rows * matrix.cols), dtype: matrix.dtype };
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
/**
|
|
317
|
+
* Extract a result TypedArray from a Matrix matching its output dtype.
|
|
318
|
+
*/
|
|
319
|
+
function unwrapResultMatrix(matrix) {
|
|
320
|
+
switch (matrix.dtype) {
|
|
321
|
+
case DType.F64: return new Float64Array(matrix.buffer, matrix.byteOffset, matrix.rows * matrix.cols);
|
|
322
|
+
case DType.F32: return new Float32Array(matrix.buffer, matrix.byteOffset, matrix.rows * matrix.cols);
|
|
323
|
+
case DType.I32: return new Int32Array(matrix.buffer, matrix.byteOffset, matrix.rows * matrix.cols);
|
|
324
|
+
case DType.U32: return new Uint32Array(matrix.buffer, matrix.byteOffset, matrix.rows * matrix.cols);
|
|
325
|
+
default: return new Float64Array(matrix.buffer, matrix.byteOffset, matrix.rows * matrix.cols);
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
/**
|
|
329
|
+
* Query the packed buffer byte count for a given matrix shape and dtype.
|
|
330
|
+
*/
|
|
331
|
+
export function dotsPackedSize(width, depth, dtype) {
|
|
332
|
+
return compiled.dotsPackedSize(width, depth, dtypeToString(dtype));
|
|
333
|
+
}
|
|
334
|
+
/**
|
|
335
|
+
* Pack a Matrix for use with packed GEMM-like operations.
|
|
336
|
+
*/
|
|
337
|
+
export function dotsPack(matrix) {
|
|
338
|
+
const { array, dtype } = unwrapMatrix(matrix);
|
|
339
|
+
const result = compiled.dotsPack(array, matrix.rows, matrix.cols, matrix.rowStride, dtypeToString(dtype));
|
|
340
|
+
return new PackedMatrix(result.buffer, result.width, result.depth, matrix.dtype, result.byteLength);
|
|
341
|
+
}
|
|
342
|
+
function packedOperation(compiledName, family, a, packed, out) {
|
|
343
|
+
if (a.cols !== packed.depth) {
|
|
344
|
+
throw new Error(`Matrix cols (${a.cols}) must match packed depth (${packed.depth})`);
|
|
345
|
+
}
|
|
346
|
+
const outDtype = outputDtype(family, a.dtype);
|
|
347
|
+
if (!out) {
|
|
348
|
+
out = new Matrix(a.rows, packed.width, outDtype);
|
|
349
|
+
}
|
|
350
|
+
const aUnwrapped = unwrapMatrix(a);
|
|
351
|
+
const resultArray = unwrapResultMatrix(out);
|
|
352
|
+
compiled[compiledName](aUnwrapped.array, packed.buffer, resultArray, a.rows, packed.width, a.cols, a.rowStride, out.rowStride, dtypeToString(a.dtype));
|
|
353
|
+
return out;
|
|
354
|
+
}
|
|
355
|
+
function symmetricOperation(compiledName, family, vectors, out, rowStart = 0, rowCount) {
|
|
356
|
+
const count = rowCount ?? vectors.rows - rowStart;
|
|
357
|
+
const outDtype = outputDtype(family, vectors.dtype);
|
|
358
|
+
if (!out) {
|
|
359
|
+
out = new Matrix(vectors.rows, vectors.rows, outDtype);
|
|
360
|
+
}
|
|
361
|
+
const vectorsUnwrapped = unwrapMatrix(vectors);
|
|
362
|
+
const resultArray = unwrapResultMatrix(out);
|
|
363
|
+
compiled[compiledName](vectorsUnwrapped.array, resultArray, vectors.rows, vectors.cols, vectors.rowStride, out.rowStride, rowStart, count, dtypeToString(vectors.dtype));
|
|
364
|
+
return out;
|
|
365
|
+
}
|
|
366
|
+
export function dotsPacked(a, packed, out) {
|
|
367
|
+
return packedOperation('dotsPacked', 'dots', a, packed, out);
|
|
368
|
+
}
|
|
369
|
+
export function angularsPacked(a, packed, out) {
|
|
370
|
+
return packedOperation('angularsPacked', 'angulars', a, packed, out);
|
|
371
|
+
}
|
|
372
|
+
export function euclideansPacked(a, packed, out) {
|
|
373
|
+
return packedOperation('euclideansPacked', 'euclideans', a, packed, out);
|
|
374
|
+
}
|
|
375
|
+
export function dotsSymmetric(vectors, out, options) {
|
|
376
|
+
return symmetricOperation('dotsSymmetric', 'dots', vectors, out, options?.rowStart ?? 0, options?.rowCount);
|
|
377
|
+
}
|
|
378
|
+
export function angularsSymmetric(vectors, out, options) {
|
|
379
|
+
return symmetricOperation('angularsSymmetric', 'angulars', vectors, out, options?.rowStart ?? 0, options?.rowCount);
|
|
380
|
+
}
|
|
381
|
+
export function euclideansSymmetric(vectors, out, options) {
|
|
382
|
+
return symmetricOperation('euclideansSymmetric', 'euclideans', vectors, out, options?.rowStart ?? 0, options?.rowCount);
|
|
383
|
+
}
|
|
384
|
+
export default {
|
|
385
|
+
dot,
|
|
386
|
+
inner,
|
|
387
|
+
sqeuclidean,
|
|
388
|
+
euclidean,
|
|
389
|
+
angular,
|
|
390
|
+
hamming,
|
|
391
|
+
jaccard,
|
|
392
|
+
kullbackleibler,
|
|
393
|
+
jensenshannon,
|
|
394
|
+
toBinary,
|
|
395
|
+
Float16Array,
|
|
396
|
+
BFloat16Array,
|
|
397
|
+
E4M3Array,
|
|
398
|
+
E5M2Array,
|
|
399
|
+
BinaryArray,
|
|
400
|
+
TensorBase,
|
|
401
|
+
VectorBase,
|
|
402
|
+
VectorView,
|
|
403
|
+
Vector,
|
|
404
|
+
MatrixBase,
|
|
405
|
+
Matrix,
|
|
406
|
+
PackedMatrix,
|
|
407
|
+
castF16ToF32,
|
|
408
|
+
castF32ToF16,
|
|
409
|
+
castBF16ToF32,
|
|
410
|
+
castF32ToBF16,
|
|
411
|
+
castE4M3ToF32,
|
|
412
|
+
castF32ToE4M3,
|
|
413
|
+
castE5M2ToF32,
|
|
414
|
+
castF32ToE5M2,
|
|
415
|
+
cast,
|
|
416
|
+
dotsPack,
|
|
417
|
+
dotsPacked,
|
|
418
|
+
angularsPacked,
|
|
419
|
+
euclideansPacked,
|
|
420
|
+
dotsSymmetric,
|
|
421
|
+
angularsSymmetric,
|
|
422
|
+
euclideansSymmetric,
|
|
423
|
+
dotsPackedSize,
|
|
424
|
+
outputDtype,
|
|
425
|
+
};
|
|
426
|
+
/**
|
|
427
|
+
* Finds the directory where the native build of the numkong module is located.
|
|
428
|
+
* @param {string} dir - The directory to start the search from.
|
|
429
|
+
*/
|
|
430
|
+
function getBuildDir(dir) {
|
|
431
|
+
if (existsSync(path.join(dir, "build")))
|
|
432
|
+
return dir;
|
|
433
|
+
if (existsSync(path.join(dir, "prebuilds")))
|
|
434
|
+
return dir;
|
|
435
|
+
if (path.basename(dir) === ".next") {
|
|
436
|
+
// special case for next.js on custom node (not vercel)
|
|
437
|
+
const sideways = path.join(dir, "..", "node_modules", "numkong");
|
|
438
|
+
if (existsSync(sideways))
|
|
439
|
+
return getBuildDir(sideways);
|
|
440
|
+
}
|
|
441
|
+
if (dir === "/")
|
|
442
|
+
throw new Error("Could not find native build for numkong");
|
|
443
|
+
return getBuildDir(path.join(dir, ".."));
|
|
444
|
+
}
|
|
445
|
+
function getDirName() {
|
|
446
|
+
try {
|
|
447
|
+
if (__dirname)
|
|
448
|
+
return __dirname;
|
|
449
|
+
}
|
|
450
|
+
catch (e) { }
|
|
451
|
+
return getRoot(getFileName());
|
|
452
|
+
}
|