npm - numkong - Versions diffs - 7.0.0 - Mend

numkong 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (294) hide show

package/LICENSE +201 -0
package/README.md +495 -0
package/binding.gyp +540 -0
package/c/dispatch.h +512 -0
package/c/dispatch_bf16.c +389 -0
package/c/dispatch_bf16c.c +52 -0
package/c/dispatch_e2m3.c +263 -0
package/c/dispatch_e3m2.c +243 -0
package/c/dispatch_e4m3.c +276 -0
package/c/dispatch_e5m2.c +272 -0
package/c/dispatch_f16.c +376 -0
package/c/dispatch_f16c.c +58 -0
package/c/dispatch_f32.c +378 -0
package/c/dispatch_f32c.c +99 -0
package/c/dispatch_f64.c +296 -0
package/c/dispatch_f64c.c +98 -0
package/c/dispatch_i16.c +96 -0
package/c/dispatch_i32.c +89 -0
package/c/dispatch_i4.c +150 -0
package/c/dispatch_i64.c +86 -0
package/c/dispatch_i8.c +289 -0
package/c/dispatch_other.c +330 -0
package/c/dispatch_u1.c +148 -0
package/c/dispatch_u16.c +124 -0
package/c/dispatch_u32.c +118 -0
package/c/dispatch_u4.c +150 -0
package/c/dispatch_u64.c +102 -0
package/c/dispatch_u8.c +303 -0
package/c/numkong.c +950 -0
package/include/README.md +573 -0
package/include/module.modulemap +129 -0
package/include/numkong/attention/sapphireamx.h +1361 -0
package/include/numkong/attention/sme.h +2066 -0
package/include/numkong/attention.h +49 -0
package/include/numkong/capabilities.h +748 -0
package/include/numkong/cast/README.md +262 -0
package/include/numkong/cast/haswell.h +975 -0
package/include/numkong/cast/icelake.h +470 -0
package/include/numkong/cast/neon.h +1192 -0
package/include/numkong/cast/rvv.h +1021 -0
package/include/numkong/cast/sapphire.h +262 -0
package/include/numkong/cast/serial.h +2262 -0
package/include/numkong/cast/skylake.h +856 -0
package/include/numkong/cast/v128relaxed.h +180 -0
package/include/numkong/cast.h +230 -0
package/include/numkong/curved/README.md +223 -0
package/include/numkong/curved/genoa.h +182 -0
package/include/numkong/curved/haswell.h +276 -0
package/include/numkong/curved/neon.h +205 -0
package/include/numkong/curved/neonbfdot.h +212 -0
package/include/numkong/curved/neonhalf.h +212 -0
package/include/numkong/curved/rvv.h +305 -0
package/include/numkong/curved/serial.h +207 -0
package/include/numkong/curved/skylake.h +457 -0
package/include/numkong/curved/smef64.h +506 -0
package/include/numkong/curved.h +517 -0
package/include/numkong/curved.hpp +144 -0
package/include/numkong/dot/README.md +425 -0
package/include/numkong/dot/alder.h +563 -0
package/include/numkong/dot/genoa.h +315 -0
package/include/numkong/dot/haswell.h +1688 -0
package/include/numkong/dot/icelake.h +883 -0
package/include/numkong/dot/neon.h +818 -0
package/include/numkong/dot/neonbfdot.h +244 -0
package/include/numkong/dot/neonfhm.h +360 -0
package/include/numkong/dot/neonhalf.h +198 -0
package/include/numkong/dot/neonsdot.h +508 -0
package/include/numkong/dot/rvv.h +714 -0
package/include/numkong/dot/rvvbb.h +72 -0
package/include/numkong/dot/rvvbf16.h +123 -0
package/include/numkong/dot/rvvhalf.h +129 -0
package/include/numkong/dot/sapphire.h +141 -0
package/include/numkong/dot/serial.h +838 -0
package/include/numkong/dot/sierra.h +405 -0
package/include/numkong/dot/skylake.h +1084 -0
package/include/numkong/dot/sve.h +379 -0
package/include/numkong/dot/svebfdot.h +74 -0
package/include/numkong/dot/svehalf.h +123 -0
package/include/numkong/dot/v128relaxed.h +1258 -0
package/include/numkong/dot.h +1070 -0
package/include/numkong/dot.hpp +94 -0
package/include/numkong/dots/README.md +496 -0
package/include/numkong/dots/alder.h +114 -0
package/include/numkong/dots/genoa.h +94 -0
package/include/numkong/dots/haswell.h +295 -0
package/include/numkong/dots/icelake.h +171 -0
package/include/numkong/dots/neon.h +120 -0
package/include/numkong/dots/neonbfdot.h +58 -0
package/include/numkong/dots/neonfhm.h +94 -0
package/include/numkong/dots/neonhalf.h +57 -0
package/include/numkong/dots/neonsdot.h +108 -0
package/include/numkong/dots/rvv.h +2486 -0
package/include/numkong/dots/sapphireamx.h +3973 -0
package/include/numkong/dots/serial.h +2844 -0
package/include/numkong/dots/sierra.h +97 -0
package/include/numkong/dots/skylake.h +196 -0
package/include/numkong/dots/sme.h +5372 -0
package/include/numkong/dots/smebi32.h +461 -0
package/include/numkong/dots/smef64.h +1318 -0
package/include/numkong/dots/smehalf.h +47 -0
package/include/numkong/dots/v128relaxed.h +294 -0
package/include/numkong/dots.h +2804 -0
package/include/numkong/dots.hpp +639 -0
package/include/numkong/each/README.md +469 -0
package/include/numkong/each/haswell.h +1658 -0
package/include/numkong/each/icelake.h +272 -0
package/include/numkong/each/neon.h +1104 -0
package/include/numkong/each/neonbfdot.h +212 -0
package/include/numkong/each/neonhalf.h +410 -0
package/include/numkong/each/rvv.h +1121 -0
package/include/numkong/each/sapphire.h +477 -0
package/include/numkong/each/serial.h +260 -0
package/include/numkong/each/skylake.h +1562 -0
package/include/numkong/each.h +2146 -0
package/include/numkong/each.hpp +434 -0
package/include/numkong/geospatial/README.md +147 -0
package/include/numkong/geospatial/haswell.h +593 -0
package/include/numkong/geospatial/neon.h +571 -0
package/include/numkong/geospatial/rvv.h +701 -0
package/include/numkong/geospatial/serial.h +309 -0
package/include/numkong/geospatial/skylake.h +577 -0
package/include/numkong/geospatial/v128relaxed.h +613 -0
package/include/numkong/geospatial.h +453 -0
package/include/numkong/geospatial.hpp +235 -0
package/include/numkong/matrix.hpp +336 -0
package/include/numkong/maxsim/README.md +187 -0
package/include/numkong/maxsim/alder.h +511 -0
package/include/numkong/maxsim/genoa.h +115 -0
package/include/numkong/maxsim/haswell.h +553 -0
package/include/numkong/maxsim/icelake.h +480 -0
package/include/numkong/maxsim/neonsdot.h +394 -0
package/include/numkong/maxsim/sapphireamx.h +877 -0
package/include/numkong/maxsim/serial.h +490 -0
package/include/numkong/maxsim/sme.h +929 -0
package/include/numkong/maxsim/v128relaxed.h +280 -0
package/include/numkong/maxsim.h +571 -0
package/include/numkong/maxsim.hpp +133 -0
package/include/numkong/mesh/README.md +227 -0
package/include/numkong/mesh/haswell.h +2235 -0
package/include/numkong/mesh/neon.h +1329 -0
package/include/numkong/mesh/neonbfdot.h +842 -0
package/include/numkong/mesh/neonhalf.h +616 -0
package/include/numkong/mesh/rvv.h +916 -0
package/include/numkong/mesh/serial.h +742 -0
package/include/numkong/mesh/skylake.h +1135 -0
package/include/numkong/mesh/v128relaxed.h +1052 -0
package/include/numkong/mesh.h +652 -0
package/include/numkong/mesh.hpp +762 -0
package/include/numkong/numkong.h +78 -0
package/include/numkong/numkong.hpp +57 -0
package/include/numkong/probability/README.md +173 -0
package/include/numkong/probability/haswell.h +267 -0
package/include/numkong/probability/neon.h +225 -0
package/include/numkong/probability/rvv.h +409 -0
package/include/numkong/probability/serial.h +169 -0
package/include/numkong/probability/skylake.h +324 -0
package/include/numkong/probability.h +383 -0
package/include/numkong/probability.hpp +120 -0
package/include/numkong/random.h +50 -0
package/include/numkong/random.hpp +285 -0
package/include/numkong/reduce/README.md +547 -0
package/include/numkong/reduce/alder.h +632 -0
package/include/numkong/reduce/genoa.h +201 -0
package/include/numkong/reduce/haswell.h +3783 -0
package/include/numkong/reduce/icelake.h +549 -0
package/include/numkong/reduce/neon.h +3841 -0
package/include/numkong/reduce/neonbfdot.h +353 -0
package/include/numkong/reduce/neonfhm.h +665 -0
package/include/numkong/reduce/neonhalf.h +157 -0
package/include/numkong/reduce/neonsdot.h +357 -0
package/include/numkong/reduce/rvv.h +3407 -0
package/include/numkong/reduce/serial.h +757 -0
package/include/numkong/reduce/sierra.h +338 -0
package/include/numkong/reduce/skylake.h +3792 -0
package/include/numkong/reduce/v128relaxed.h +2302 -0
package/include/numkong/reduce.h +1597 -0
package/include/numkong/reduce.hpp +633 -0
package/include/numkong/scalar/README.md +89 -0
package/include/numkong/scalar/haswell.h +113 -0
package/include/numkong/scalar/neon.h +122 -0
package/include/numkong/scalar/neonhalf.h +70 -0
package/include/numkong/scalar/rvv.h +211 -0
package/include/numkong/scalar/sapphire.h +63 -0
package/include/numkong/scalar/serial.h +332 -0
package/include/numkong/scalar/v128relaxed.h +56 -0
package/include/numkong/scalar.h +683 -0
package/include/numkong/set/README.md +179 -0
package/include/numkong/set/haswell.h +334 -0
package/include/numkong/set/icelake.h +485 -0
package/include/numkong/set/neon.h +364 -0
package/include/numkong/set/rvv.h +226 -0
package/include/numkong/set/rvvbb.h +117 -0
package/include/numkong/set/serial.h +174 -0
package/include/numkong/set/sve.h +185 -0
package/include/numkong/set/v128relaxed.h +240 -0
package/include/numkong/set.h +457 -0
package/include/numkong/set.hpp +114 -0
package/include/numkong/sets/README.md +149 -0
package/include/numkong/sets/haswell.h +63 -0
package/include/numkong/sets/icelake.h +66 -0
package/include/numkong/sets/neon.h +61 -0
package/include/numkong/sets/serial.h +43 -0
package/include/numkong/sets/smebi32.h +1099 -0
package/include/numkong/sets/v128relaxed.h +58 -0
package/include/numkong/sets.h +339 -0
package/include/numkong/sparse/README.md +156 -0
package/include/numkong/sparse/icelake.h +463 -0
package/include/numkong/sparse/neon.h +288 -0
package/include/numkong/sparse/serial.h +117 -0
package/include/numkong/sparse/sve2.h +507 -0
package/include/numkong/sparse/turin.h +322 -0
package/include/numkong/sparse.h +363 -0
package/include/numkong/sparse.hpp +113 -0
package/include/numkong/spatial/README.md +435 -0
package/include/numkong/spatial/alder.h +607 -0
package/include/numkong/spatial/genoa.h +290 -0
package/include/numkong/spatial/haswell.h +960 -0
package/include/numkong/spatial/icelake.h +586 -0
package/include/numkong/spatial/neon.h +773 -0
package/include/numkong/spatial/neonbfdot.h +165 -0
package/include/numkong/spatial/neonhalf.h +118 -0
package/include/numkong/spatial/neonsdot.h +261 -0
package/include/numkong/spatial/rvv.h +984 -0
package/include/numkong/spatial/rvvbf16.h +123 -0
package/include/numkong/spatial/rvvhalf.h +117 -0
package/include/numkong/spatial/sapphire.h +343 -0
package/include/numkong/spatial/serial.h +346 -0
package/include/numkong/spatial/sierra.h +323 -0
package/include/numkong/spatial/skylake.h +606 -0
package/include/numkong/spatial/sve.h +224 -0
package/include/numkong/spatial/svebfdot.h +122 -0
package/include/numkong/spatial/svehalf.h +109 -0
package/include/numkong/spatial/v128relaxed.h +717 -0
package/include/numkong/spatial.h +1425 -0
package/include/numkong/spatial.hpp +183 -0
package/include/numkong/spatials/README.md +580 -0
package/include/numkong/spatials/alder.h +94 -0
package/include/numkong/spatials/genoa.h +94 -0
package/include/numkong/spatials/haswell.h +219 -0
package/include/numkong/spatials/icelake.h +113 -0
package/include/numkong/spatials/neon.h +109 -0
package/include/numkong/spatials/neonbfdot.h +60 -0
package/include/numkong/spatials/neonfhm.h +92 -0
package/include/numkong/spatials/neonhalf.h +58 -0
package/include/numkong/spatials/neonsdot.h +109 -0
package/include/numkong/spatials/rvv.h +1960 -0
package/include/numkong/spatials/sapphireamx.h +1149 -0
package/include/numkong/spatials/serial.h +226 -0
package/include/numkong/spatials/sierra.h +96 -0
package/include/numkong/spatials/skylake.h +184 -0
package/include/numkong/spatials/sme.h +1901 -0
package/include/numkong/spatials/smef64.h +465 -0
package/include/numkong/spatials/v128relaxed.h +240 -0
package/include/numkong/spatials.h +3021 -0
package/include/numkong/spatials.hpp +508 -0
package/include/numkong/tensor.hpp +1592 -0
package/include/numkong/trigonometry/README.md +184 -0
package/include/numkong/trigonometry/haswell.h +652 -0
package/include/numkong/trigonometry/neon.h +639 -0
package/include/numkong/trigonometry/rvv.h +699 -0
package/include/numkong/trigonometry/serial.h +703 -0
package/include/numkong/trigonometry/skylake.h +721 -0
package/include/numkong/trigonometry/v128relaxed.h +666 -0
package/include/numkong/trigonometry.h +467 -0
package/include/numkong/trigonometry.hpp +166 -0
package/include/numkong/types.h +1384 -0
package/include/numkong/types.hpp +5603 -0
package/include/numkong/vector.hpp +698 -0
package/javascript/README.md +246 -0
package/javascript/dist/cjs/numkong-wasm.d.ts +166 -0
package/javascript/dist/cjs/numkong-wasm.js +617 -0
package/javascript/dist/cjs/numkong.d.ts +343 -0
package/javascript/dist/cjs/numkong.js +523 -0
package/javascript/dist/cjs/package.json +3 -0
package/javascript/dist/cjs/types.d.ts +284 -0
package/javascript/dist/cjs/types.js +653 -0
package/javascript/dist/esm/numkong-wasm.d.ts +166 -0
package/javascript/dist/esm/numkong-wasm.js +595 -0
package/javascript/dist/esm/numkong.d.ts +343 -0
package/javascript/dist/esm/numkong.js +452 -0
package/javascript/dist/esm/package.json +3 -0
package/javascript/dist/esm/types.d.ts +284 -0
package/javascript/dist/esm/types.js +630 -0
package/javascript/dist-package-cjs.json +3 -0
package/javascript/dist-package-esm.json +3 -0
package/javascript/node-gyp-build.d.ts +1 -0
package/javascript/numkong-wasm.ts +756 -0
package/javascript/numkong.c +689 -0
package/javascript/numkong.ts +575 -0
package/javascript/tsconfig-base.json +39 -0
package/javascript/tsconfig-cjs.json +8 -0
package/javascript/tsconfig-esm.json +8 -0
package/javascript/types.ts +674 -0
package/package.json +87 -0

package/javascript/dist/esm/types.js ADDED Viewed

@@ -0,0 +1,630 @@
+/**
+ * @brief Custom TypedArray classes for non-native numeric types.
+ * @file javascript/types.ts
+ * @author Ash Vardanian
+ * @date February 3, 2026
+ *
+ * This file provides TypedArray wrappers for numeric types not natively supported
+ * by JavaScript, using NumKong's SIMD-optimized conversion functions from the C library.
+ */
+// Import conversion functions from the compiled native module
+// These will be defined in numkong.ts after the module is loaded
+let conversionFunctions;
+// This will be called by numkong.ts after loading the module
+export function setConversionFunctions(fns) {
+    conversionFunctions = fns;
+}
+/** @brief Numeric data type enum — integer switch, compiles to jump table. */
+export var DType;
+(function (DType) {
+    DType[DType["F64"] = 0] = "F64";
+    DType[DType["F32"] = 1] = "F32";
+    DType[DType["F16"] = 2] = "F16";
+    DType[DType["BF16"] = 3] = "BF16";
+    DType[DType["E4M3"] = 4] = "E4M3";
+    DType[DType["E5M2"] = 5] = "E5M2";
+    DType[DType["E2M3"] = 6] = "E2M3";
+    DType[DType["E3M2"] = 7] = "E3M2";
+    DType[DType["I8"] = 8] = "I8";
+    DType[DType["U8"] = 9] = "U8";
+    DType[DType["U1"] = 10] = "U1";
+    DType[DType["I32"] = 11] = "I32";
+    DType[DType["U32"] = 12] = "U32";
+})(DType || (DType = {}));
+/** @brief O(1) array lookup for DType → string conversion (needed at N-API/WASM boundaries). */
+export const DTYPE_STRINGS = [
+    'f64', 'f32', 'f16', 'bf16', 'e4m3', 'e5m2', 'e2m3', 'e3m2', 'i8', 'u8', 'u1', 'i32', 'u32',
+];
+/** @brief Convert a DType enum value to its string representation. */
+export function dtypeToString(d) { return DTYPE_STRINGS[d]; }
+/** @brief Infer the DType from a TypedArray instance. */
+function inferDtype(arr) {
+    if (arr instanceof Float64Array)
+        return DType.F64;
+    if (arr instanceof Float32Array)
+        return DType.F32;
+    if (arr instanceof Int32Array)
+        return DType.I32;
+    if (arr instanceof Int8Array)
+        return DType.I8;
+    if (arr instanceof Uint8Array)
+        return DType.U8;
+    if (arr instanceof Uint16Array)
+        return DType.F16;
+    if (arr instanceof Uint32Array)
+        return DType.U32;
+    throw new Error(`Cannot infer dtype from ${arr.constructor.name}`);
+}
+/**
+ * @brief Abstract base class for all tensor types.
+ *
+ * All fields are embedded — zero dynamic allocation. DType is a numeric enum
+ * (integer switch). Mirrors the C++ pattern: buffer + byteOffset + dtype.
+ */
+export class TensorBase {
+    constructor(buffer, byteOffset, dtype) {
+        this.buffer = buffer;
+        this.byteOffset = byteOffset;
+        this.dtype = dtype;
+    }
+    /** @brief Bytes per element for this tensor's dtype (compiles to jump table). */
+    get bytesPerElement() {
+        switch (this.dtype) {
+            case DType.F64: return 8;
+            case DType.F32:
+            case DType.I32:
+            case DType.U32: return 4;
+            case DType.F16:
+            case DType.BF16: return 2;
+            default: return 1;
+        }
+    }
+    /** @brief Total byte length of the tensor data. */
+    get byteLength() { return this.length * this.bytesPerElement; }
+}
+/**
+ * @brief Abstract rank-1 tensor base class.
+ */
+export class VectorBase extends TensorBase {
+    constructor(buffer, byteOffset, length, dtype) {
+        super(buffer, byteOffset, dtype);
+        this.length = length;
+    }
+    get rank() { return 1; }
+}
+/**
+ * @brief Non-owning rank-1 tensor view (like std::span<T>).
+ *
+ * Zero-copy wrapper for existing memory. Ideal for cross-module WASM interop
+ * where data already lives on the WASM heap.
+ */
+export class VectorView extends VectorBase {
+    constructor(buffer, byteOffset, length, dtype) {
+        super(buffer, byteOffset, length, dtype);
+    }
+    /** @brief Create a VectorView from any TypedArray, inferring or accepting dtype. */
+    static from(arr, dtype) {
+        const d = dtype ?? inferDtype(arr);
+        return new VectorView(arr.buffer, arr.byteOffset, arr.length, d);
+    }
+}
+/**
+ * @brief Owning rank-1 tensor (like std::vector<T>).
+ *
+ * Allocates its own ArrayBuffer. Use for storing results or when you need
+ * independent ownership of the data.
+ */
+export class Vector extends VectorBase {
+    constructor(lengthOrBuffer, dtypeOrLength, dtype) {
+        if (typeof lengthOrBuffer === 'number') {
+            const length = lengthOrBuffer;
+            const dt = dtypeOrLength;
+            let bpe;
+            switch (dt) {
+                case DType.F64:
+                    bpe = 8;
+                    break;
+                case DType.F32:
+                case DType.I32:
+                case DType.U32:
+                    bpe = 4;
+                    break;
+                case DType.F16:
+                case DType.BF16:
+                    bpe = 2;
+                    break;
+                default:
+                    bpe = 1;
+                    break;
+            }
+            super(new ArrayBuffer(length * bpe), 0, length, dt);
+        }
+        else {
+            super(lengthOrBuffer, 0, dtypeOrLength, dtype);
+        }
+    }
+    /** @brief Create an owning Vector by copying data from a TypedArray. */
+    static fromTypedArray(arr, dtype) {
+        const d = dtype ?? inferDtype(arr);
+        return new Vector(arr.buffer.slice(arr.byteOffset, arr.byteOffset + arr.byteLength), arr.length, d);
+    }
+    /** @brief Create an owning Vector by copying data from any TensorBase. */
+    static fromView(view) {
+        return new Vector(view.buffer.slice(view.byteOffset, view.byteOffset + view.byteLength), view.length, view.dtype);
+    }
+    /** @brief Return a TypedArray view over this Vector's owned buffer (zero-copy). */
+    toTypedArray() {
+        switch (this.dtype) {
+            case DType.F64: return new Float64Array(this.buffer, 0, this.length);
+            case DType.F32: return new Float32Array(this.buffer, 0, this.length);
+            case DType.I32: return new Int32Array(this.buffer, 0, this.length);
+            case DType.U32: return new Uint32Array(this.buffer, 0, this.length);
+            case DType.F16:
+            case DType.BF16: return new Uint16Array(this.buffer, 0, this.length);
+            case DType.I8: return new Int8Array(this.buffer, 0, this.length);
+            default: return new Uint8Array(this.buffer, 0, this.length);
+        }
+    }
+}
+/**
+ * @brief Abstract rank-2 tensor base class.
+ *
+ * All 4 dimension fields are embedded — no dynamic allocation.
+ * Strides are in bytes to match the C API directly.
+ */
+export class MatrixBase extends TensorBase {
+    constructor(buffer, byteOffset, dtype, rows, cols, rowStride, colStride) {
+        super(buffer, byteOffset, dtype);
+        this.rows = rows;
+        this.cols = cols;
+        this.rowStride = rowStride;
+        this.colStride = colStride;
+    }
+    get length() { return this.rows * this.cols; }
+    get rank() { return 2; }
+}
+/**
+ * @brief Owning rank-2 tensor (row-major, C-contiguous by default).
+ *
+ * Strides are byte strides. Default for C-contiguous layout:
+ * rowStride = cols * bytesPerElement, colStride = bytesPerElement.
+ */
+export class Matrix extends MatrixBase {
+    constructor(rowsOrBuffer, colsOrByteOffset, dtype, rows, cols, rowStride, colStride) {
+        if (typeof rowsOrBuffer === 'number') {
+            const r = rowsOrBuffer;
+            const c = colsOrByteOffset;
+            let bpe;
+            switch (dtype) {
+                case DType.F64:
+                    bpe = 8;
+                    break;
+                case DType.F32:
+                case DType.I32:
+                case DType.U32:
+                    bpe = 4;
+                    break;
+                case DType.F16:
+                case DType.BF16:
+                    bpe = 2;
+                    break;
+                default:
+                    bpe = 1;
+                    break;
+            }
+            super(new ArrayBuffer(r * c * bpe), 0, dtype, r, c, c * bpe, bpe);
+        }
+        else {
+            const r = rows;
+            const c = cols;
+            let bpe;
+            switch (dtype) {
+                case DType.F64:
+                    bpe = 8;
+                    break;
+                case DType.F32:
+                case DType.I32:
+                case DType.U32:
+                    bpe = 4;
+                    break;
+                case DType.F16:
+                case DType.BF16:
+                    bpe = 2;
+                    break;
+                default:
+                    bpe = 1;
+                    break;
+            }
+            super(rowsOrBuffer, colsOrByteOffset, dtype, r, c, rowStride ?? c * bpe, colStride ?? bpe);
+        }
+    }
+    static fromTypedArray(array, rows, cols, dtype) {
+        const d = dtype ?? inferDtype(array);
+        const buf = array.buffer.slice(array.byteOffset, array.byteOffset + array.byteLength);
+        return new Matrix(buf, 0, d, rows, cols);
+    }
+    toTypedArray() {
+        switch (this.dtype) {
+            case DType.F64: return new Float64Array(this.buffer, this.byteOffset, this.rows * this.cols);
+            case DType.F32: return new Float32Array(this.buffer, this.byteOffset, this.rows * this.cols);
+            case DType.I32: return new Int32Array(this.buffer, this.byteOffset, this.rows * this.cols);
+            case DType.U32: return new Uint32Array(this.buffer, this.byteOffset, this.rows * this.cols);
+            case DType.F16:
+            case DType.BF16: return new Uint16Array(this.buffer, this.byteOffset, this.rows * this.cols);
+            case DType.I8: return new Int8Array(this.buffer, this.byteOffset, this.rows * this.cols);
+            default: return new Uint8Array(this.buffer, this.byteOffset, this.rows * this.cols);
+        }
+    }
+    row(index) {
+        return new VectorView(this.buffer, this.byteOffset + index * this.rowStride, this.cols, this.dtype);
+    }
+}
+/**
+ * @brief Opaque packed matrix container.
+ *
+ * Packed layout is not indexable — this is a data container for packed GEMM kernels.
+ * N-API path: buffer is a V8-managed ArrayBuffer, auto-freed by GC.
+ * WASM path: stores a heap pointer, dispose() calls Module._free().
+ */
+export class PackedMatrix {
+    constructor(buffer, width, depth, dtype, byteLength) {
+        this._disposed = false;
+        this.buffer = buffer;
+        this.width = width;
+        this.depth = depth;
+        this.dtype = dtype;
+        this.byteLength = byteLength;
+    }
+    dispose() { this._disposed = true; }
+    get disposed() { return this._disposed; }
+}
+/**
+ * @brief Determines the output dtype for a given kernel family and input dtype.
+ * Mirrors nk_kernel_output_dtype from C.
+ */
+export function outputDtype(family, input) {
+    switch (input) {
+        case DType.F64: return DType.F64;
+        case DType.F32: return DType.F64;
+        case DType.F16:
+        case DType.BF16:
+        case DType.E4M3:
+        case DType.E5M2:
+        case DType.E2M3:
+        case DType.E3M2:
+            return DType.F32;
+        case DType.I8: return family === 'dots' ? DType.I32 : DType.F32;
+        case DType.U8: return family === 'dots' ? DType.U32 : DType.F32;
+        default: return DType.F32;
+    }
+}
+/**
+ * @brief IEEE 754 Half Precision Float (f16)
+ *
+ * 16-bit floating point: 1 sign bit, 5 exponent bits, 10 mantissa bits
+ * Range: ~±65504, precision: ~3-4 decimal digits
+ *
+ * Common in GPU inference, model compression, and mixed-precision training.
+ * Supported natively on Apple Silicon, NVIDIA GPUs (fp16), AMD GPUs.
+ */
+export class Float16Array extends Uint16Array {
+    constructor(length, byteOffset, arrayLength) {
+        if (typeof length === 'number') {
+            super(length);
+        }
+        else if (ArrayBuffer.isView(length) || length instanceof ArrayBuffer) {
+            super(length, byteOffset, arrayLength);
+        }
+        else {
+            // Convert from array-like of numbers
+            const src = length;
+            const arr = new Uint16Array(src.length);
+            if (conversionFunctions) {
+                for (let i = 0; i < src.length; i++) {
+                    arr[i] = conversionFunctions.castF32ToF16(src[i]);
+                }
+            }
+            super(arr);
+        }
+    }
+    /**
+     * @brief Converts the entire f16 array to f32 (Float32Array).
+     * @returns Float32Array with decoded values
+     */
+    toFloat32Array() {
+        if (!conversionFunctions) {
+            throw new Error('Conversion functions not initialized');
+        }
+        const result = new Float32Array(this.length);
+        for (let i = 0; i < this.length; i++) {
+            result[i] = conversionFunctions.castF16ToF32(this[i]);
+        }
+        return result;
+    }
+    /**
+     * @brief Gets the f32 value at the specified index.
+     * @param index Array index
+     * @returns Decoded f32 value
+     */
+    getFloat32(index) {
+        if (!conversionFunctions) {
+            throw new Error('Conversion functions not initialized');
+        }
+        return conversionFunctions.castF16ToF32(this[index]);
+    }
+    /**
+     * @brief Sets the value at the specified index from an f32 value.
+     * @param index Array index
+     * @param value f32 value to encode and store
+     */
+    setFloat32(index, value) {
+        if (!conversionFunctions) {
+            throw new Error('Conversion functions not initialized');
+        }
+        this[index] = conversionFunctions.castF32ToF16(value);
+    }
+}
+/**
+ * @brief Brain Float 16 (bf16)
+ *
+ * 16-bit floating point: 1 sign bit, 8 exponent bits, 7 mantissa bits
+ * Range: same as f32 (~±3.4e38), precision: ~2-3 decimal digits
+ *
+ * Designed by Google for TPUs, optimized for ML training (wider range than f16).
+ * Supported on Google TPUs, Intel Sapphire Rapids, AMD Genoa, ARM Neoverse V2.
+ * Truncated f32 (top 16 bits), making conversion very cheap.
+ */
+export class BFloat16Array extends Uint16Array {
+    constructor(length, byteOffset, arrayLength) {
+        if (typeof length === 'number') {
+            super(length);
+        }
+        else if (ArrayBuffer.isView(length) || length instanceof ArrayBuffer) {
+            super(length, byteOffset, arrayLength);
+        }
+        else {
+            const src = length;
+            const arr = new Uint16Array(src.length);
+            if (conversionFunctions) {
+                for (let i = 0; i < src.length; i++) {
+                    arr[i] = conversionFunctions.castF32ToBF16(src[i]);
+                }
+            }
+            super(arr);
+        }
+    }
+    toFloat32Array() {
+        if (!conversionFunctions) {
+            throw new Error('Conversion functions not initialized');
+        }
+        const result = new Float32Array(this.length);
+        for (let i = 0; i < this.length; i++) {
+            result[i] = conversionFunctions.castBF16ToF32(this[i]);
+        }
+        return result;
+    }
+    getFloat32(index) {
+        if (!conversionFunctions) {
+            throw new Error('Conversion functions not initialized');
+        }
+        return conversionFunctions.castBF16ToF32(this[index]);
+    }
+    setFloat32(index, value) {
+        if (!conversionFunctions) {
+            throw new Error('Conversion functions not initialized');
+        }
+        this[index] = conversionFunctions.castF32ToBF16(value);
+    }
+}
+/**
+ * @brief FP8 E4M3 (4-bit exponent, 3-bit mantissa)
+ *
+ * 8-bit floating point: 1 sign bit, 4 exponent bits, 3 mantissa bits
+ * Range: ~±448, precision: ~1 decimal digit
+ *
+ * Optimized for forward pass inference with higher precision than E5M2.
+ * Supported on NVIDIA Hopper H100 GPUs, AMD Instinct MI300.
+ */
+export class E4M3Array extends Uint8Array {
+    constructor(length, byteOffset, arrayLength) {
+        if (typeof length === 'number') {
+            super(length);
+        }
+        else if (ArrayBuffer.isView(length) || length instanceof ArrayBuffer) {
+            super(length, byteOffset, arrayLength);
+        }
+        else {
+            const src = length;
+            const arr = new Uint8Array(src.length);
+            if (conversionFunctions) {
+                for (let i = 0; i < src.length; i++) {
+                    arr[i] = conversionFunctions.castF32ToE4M3(src[i]);
+                }
+            }
+            super(arr);
+        }
+    }
+    toFloat32Array() {
+        if (!conversionFunctions) {
+            throw new Error('Conversion functions not initialized');
+        }
+        const result = new Float32Array(this.length);
+        for (let i = 0; i < this.length; i++) {
+            result[i] = conversionFunctions.castE4M3ToF32(this[i]);
+        }
+        return result;
+    }
+    getFloat32(index) {
+        if (!conversionFunctions) {
+            throw new Error('Conversion functions not initialized');
+        }
+        return conversionFunctions.castE4M3ToF32(this[index]);
+    }
+    setFloat32(index, value) {
+        if (!conversionFunctions) {
+            throw new Error('Conversion functions not initialized');
+        }
+        this[index] = conversionFunctions.castF32ToE4M3(value);
+    }
+}
+/**
+ * @brief FP8 E5M2 (5-bit exponent, 2-bit mantissa)
+ *
+ * 8-bit floating point: 1 sign bit, 5 exponent bits, 2 mantissa bits
+ * Range: ~±57344, precision: <1 decimal digit
+ *
+ * Optimized for backward pass training with wider range than E4M3.
+ * Supported on NVIDIA Hopper H100 GPUs, AMD Instinct MI300.
+ */
+export class E5M2Array extends Uint8Array {
+    constructor(length, byteOffset, arrayLength) {
+        if (typeof length === 'number') {
+            super(length);
+        }
+        else if (ArrayBuffer.isView(length) || length instanceof ArrayBuffer) {
+            super(length, byteOffset, arrayLength);
+        }
+        else {
+            const src = length;
+            const arr = new Uint8Array(src.length);
+            if (conversionFunctions) {
+                for (let i = 0; i < src.length; i++) {
+                    arr[i] = conversionFunctions.castF32ToE5M2(src[i]);
+                }
+            }
+            super(arr);
+        }
+    }
+    toFloat32Array() {
+        if (!conversionFunctions) {
+            throw new Error('Conversion functions not initialized');
+        }
+        const result = new Float32Array(this.length);
+        for (let i = 0; i < this.length; i++) {
+            result[i] = conversionFunctions.castE5M2ToF32(this[i]);
+        }
+        return result;
+    }
+    getFloat32(index) {
+        if (!conversionFunctions) {
+            throw new Error('Conversion functions not initialized');
+        }
+        return conversionFunctions.castE5M2ToF32(this[index]);
+    }
+    setFloat32(index, value) {
+        if (!conversionFunctions) {
+            throw new Error('Conversion functions not initialized');
+        }
+        this[index] = conversionFunctions.castF32ToE5M2(value);
+    }
+}
+/**
+ * @brief Binary Array (u1) - Bit-packed binary vectors
+ *
+ * 1-bit per element, packed into bytes (8 bits per byte)
+ * Used for binary embeddings, hashing, and Hamming/Jaccard distances.
+ *
+ * Common in semantic search with binary quantization (Cohere, Voyage).
+ */
+export class BinaryArray extends Uint8Array {
+    constructor(bitLength) {
+        const byteLength = Math.ceil(bitLength / 8);
+        super(byteLength);
+        this._bitLength = bitLength;
+    }
+    /**
+     * @brief Gets the bit value at the specified index.
+     * @param index Bit index (0 to bitLength-1)
+     * @returns 0 or 1
+     */
+    getBit(index) {
+        if (index < 0 || index >= this._bitLength) {
+            throw new RangeError('Index out of bounds');
+        }
+        const byteIndex = index >>> 3; // index / 8
+        const bitIndex = index & 7; // index % 8
+        return (this[byteIndex] >>> bitIndex) & 1;
+    }
+    /**
+     * @brief Sets the bit value at the specified index.
+     * @param index Bit index (0 to bitLength-1)
+     * @param value 0 or 1
+     */
+    setBit(index, value) {
+        if (index < 0 || index >= this._bitLength) {
+            throw new RangeError('Index out of bounds');
+        }
+        const byteIndex = index >>> 3;
+        const bitIndex = index & 7;
+        if (value) {
+            this[byteIndex] |= (1 << bitIndex);
+        }
+        else {
+            this[byteIndex] &= ~(1 << bitIndex);
+        }
+    }
+    /**
+     * @brief Returns the logical bit length of the array.
+     */
+    get bitLength() {
+        return this._bitLength;
+    }
+    /**
+     * @brief Creates a BinaryArray from a Float32Array (positive values = 1, else 0).
+     * @param vector Source floating-point vector
+     * @returns Binary array with quantized values
+     */
+    static fromFloat32Array(vector) {
+        const binary = new BinaryArray(vector.length);
+        for (let i = 0; i < vector.length; i++) {
+            if (vector[i] > 0) {
+                binary.setBit(i, 1);
+            }
+        }
+        return binary;
+    }
+    /**
+     * @brief Creates a BinaryArray from a Float64Array (positive values = 1, else 0).
+     * @param vector Source floating-point vector
+     * @returns Binary array with quantized values
+     */
+    static fromFloat64Array(vector) {
+        const binary = new BinaryArray(vector.length);
+        for (let i = 0; i < vector.length; i++) {
+            if (vector[i] > 0) {
+                binary.setBit(i, 1);
+            }
+        }
+        return binary;
+    }
+}
+/**
+ * @brief Type guard to check if an object is a Float16Array.
+ */
+export function isFloat16Array(obj) {
+    return obj instanceof Float16Array;
+}
+/**
+ * @brief Type guard to check if an object is a BFloat16Array.
+ */
+export function isBFloat16Array(obj) {
+    return obj instanceof BFloat16Array;
+}
+/**
+ * @brief Type guard to check if an object is an E4M3Array.
+ */
+export function isE4M3Array(obj) {
+    return obj instanceof E4M3Array;
+}
+/**
+ * @brief Type guard to check if an object is an E5M2Array.
+ */
+export function isE5M2Array(obj) {
+    return obj instanceof E5M2Array;
+}
+/**
+ * @brief Type guard to check if an object is a BinaryArray.
+ */
+export function isBinaryArray(obj) {
+    return obj instanceof BinaryArray;
+}

package/javascript/dist-package-cjs.json ADDED Viewed

@@ -0,0 +1,3 @@
+{
+    "type": "commonjs"
+}

package/javascript/dist-package-esm.json ADDED Viewed

@@ -0,0 +1,3 @@
+{
+    "type": "module"
+}

package/javascript/node-gyp-build.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ declare module "node-gyp-build";