numkong 7.0.0 → 7.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +197 -124
- package/binding.gyp +34 -484
- package/c/dispatch_bf16.c +59 -1
- package/c/dispatch_e2m3.c +41 -8
- package/c/dispatch_e3m2.c +49 -8
- package/c/dispatch_e4m3.c +51 -9
- package/c/dispatch_e5m2.c +45 -1
- package/c/dispatch_f16.c +79 -26
- package/c/dispatch_f16c.c +5 -5
- package/c/dispatch_f32.c +56 -0
- package/c/dispatch_f64.c +52 -0
- package/c/dispatch_i4.c +3 -0
- package/c/dispatch_i8.c +62 -3
- package/c/dispatch_other.c +18 -0
- package/c/dispatch_u1.c +54 -9
- package/c/dispatch_u4.c +3 -0
- package/c/dispatch_u8.c +64 -3
- package/c/numkong.c +3 -0
- package/include/README.md +79 -9
- package/include/numkong/attention/sapphireamx.h +278 -276
- package/include/numkong/attention/sme.h +983 -977
- package/include/numkong/attention.h +1 -1
- package/include/numkong/capabilities.h +289 -94
- package/include/numkong/cast/README.md +40 -40
- package/include/numkong/cast/diamond.h +64 -0
- package/include/numkong/cast/haswell.h +42 -194
- package/include/numkong/cast/icelake.h +42 -37
- package/include/numkong/cast/loongsonasx.h +252 -0
- package/include/numkong/cast/neon.h +216 -249
- package/include/numkong/cast/powervsx.h +449 -0
- package/include/numkong/cast/rvv.h +223 -274
- package/include/numkong/cast/sapphire.h +18 -18
- package/include/numkong/cast/serial.h +1018 -944
- package/include/numkong/cast/skylake.h +82 -23
- package/include/numkong/cast/v128relaxed.h +462 -105
- package/include/numkong/cast.h +24 -0
- package/include/numkong/cast.hpp +44 -0
- package/include/numkong/curved/README.md +17 -17
- package/include/numkong/curved/neon.h +131 -7
- package/include/numkong/curved/neonbfdot.h +6 -7
- package/include/numkong/curved/rvv.h +26 -26
- package/include/numkong/curved/smef64.h +186 -182
- package/include/numkong/curved.h +14 -18
- package/include/numkong/dot/README.md +154 -137
- package/include/numkong/dot/alder.h +43 -43
- package/include/numkong/dot/diamond.h +158 -0
- package/include/numkong/dot/genoa.h +4 -30
- package/include/numkong/dot/haswell.h +215 -180
- package/include/numkong/dot/icelake.h +190 -76
- package/include/numkong/dot/loongsonasx.h +671 -0
- package/include/numkong/dot/neon.h +124 -73
- package/include/numkong/dot/neonbfdot.h +11 -12
- package/include/numkong/dot/neonfhm.h +44 -46
- package/include/numkong/dot/neonfp8.h +323 -0
- package/include/numkong/dot/neonsdot.h +190 -76
- package/include/numkong/dot/powervsx.h +752 -0
- package/include/numkong/dot/rvv.h +92 -84
- package/include/numkong/dot/rvvbf16.h +12 -12
- package/include/numkong/dot/rvvhalf.h +12 -12
- package/include/numkong/dot/sapphire.h +4 -4
- package/include/numkong/dot/serial.h +66 -30
- package/include/numkong/dot/sierra.h +31 -31
- package/include/numkong/dot/skylake.h +142 -110
- package/include/numkong/dot/sve.h +217 -177
- package/include/numkong/dot/svebfdot.h +10 -10
- package/include/numkong/dot/svehalf.h +85 -41
- package/include/numkong/dot/svesdot.h +89 -0
- package/include/numkong/dot/v128relaxed.h +124 -89
- package/include/numkong/dot.h +114 -48
- package/include/numkong/dots/README.md +203 -203
- package/include/numkong/dots/alder.h +12 -9
- package/include/numkong/dots/diamond.h +86 -0
- package/include/numkong/dots/genoa.h +10 -4
- package/include/numkong/dots/haswell.h +63 -48
- package/include/numkong/dots/icelake.h +27 -18
- package/include/numkong/dots/loongsonasx.h +176 -0
- package/include/numkong/dots/neon.h +14 -11
- package/include/numkong/dots/neonbfdot.h +4 -3
- package/include/numkong/dots/neonfhm.h +11 -9
- package/include/numkong/dots/neonfp8.h +99 -0
- package/include/numkong/dots/neonsdot.h +48 -12
- package/include/numkong/dots/powervsx.h +194 -0
- package/include/numkong/dots/rvv.h +451 -344
- package/include/numkong/dots/sapphireamx.h +1028 -984
- package/include/numkong/dots/serial.h +213 -197
- package/include/numkong/dots/sierra.h +10 -7
- package/include/numkong/dots/skylake.h +47 -36
- package/include/numkong/dots/sme.h +2001 -2364
- package/include/numkong/dots/smebi32.h +175 -162
- package/include/numkong/dots/smef64.h +328 -323
- package/include/numkong/dots/v128relaxed.h +64 -41
- package/include/numkong/dots.h +573 -293
- package/include/numkong/dots.hpp +45 -43
- package/include/numkong/each/README.md +133 -137
- package/include/numkong/each/haswell.h +6 -6
- package/include/numkong/each/icelake.h +7 -7
- package/include/numkong/each/neon.h +76 -42
- package/include/numkong/each/neonbfdot.h +11 -12
- package/include/numkong/each/neonhalf.h +24 -116
- package/include/numkong/each/rvv.h +28 -28
- package/include/numkong/each/sapphire.h +27 -161
- package/include/numkong/each/serial.h +6 -6
- package/include/numkong/each/skylake.h +7 -7
- package/include/numkong/each/v128relaxed.h +562 -0
- package/include/numkong/each.h +148 -62
- package/include/numkong/each.hpp +2 -2
- package/include/numkong/geospatial/README.md +18 -18
- package/include/numkong/geospatial/haswell.h +365 -325
- package/include/numkong/geospatial/neon.h +350 -306
- package/include/numkong/geospatial/rvv.h +4 -4
- package/include/numkong/geospatial/skylake.h +376 -340
- package/include/numkong/geospatial/v128relaxed.h +366 -327
- package/include/numkong/geospatial.h +17 -17
- package/include/numkong/matrix.hpp +4 -4
- package/include/numkong/maxsim/README.md +14 -14
- package/include/numkong/maxsim/alder.h +6 -6
- package/include/numkong/maxsim/genoa.h +4 -4
- package/include/numkong/maxsim/haswell.h +6 -6
- package/include/numkong/maxsim/icelake.h +18 -18
- package/include/numkong/maxsim/neonsdot.h +21 -21
- package/include/numkong/maxsim/sapphireamx.h +14 -14
- package/include/numkong/maxsim/serial.h +6 -6
- package/include/numkong/maxsim/sme.h +221 -196
- package/include/numkong/maxsim/v128relaxed.h +6 -6
- package/include/numkong/mesh/README.md +62 -56
- package/include/numkong/mesh/haswell.h +339 -464
- package/include/numkong/mesh/neon.h +1100 -519
- package/include/numkong/mesh/neonbfdot.h +36 -68
- package/include/numkong/mesh/rvv.h +530 -435
- package/include/numkong/mesh/serial.h +75 -91
- package/include/numkong/mesh/skylake.h +1627 -302
- package/include/numkong/mesh/v128relaxed.h +443 -330
- package/include/numkong/mesh.h +63 -49
- package/include/numkong/mesh.hpp +4 -4
- package/include/numkong/numkong.h +3 -3
- package/include/numkong/numkong.hpp +1 -0
- package/include/numkong/probability/README.md +23 -19
- package/include/numkong/probability/neon.h +82 -52
- package/include/numkong/probability/rvv.h +28 -23
- package/include/numkong/probability/serial.h +51 -39
- package/include/numkong/probability.h +20 -23
- package/include/numkong/random.h +1 -1
- package/include/numkong/reduce/README.md +143 -138
- package/include/numkong/reduce/alder.h +81 -77
- package/include/numkong/reduce/haswell.h +222 -220
- package/include/numkong/reduce/neon.h +629 -519
- package/include/numkong/reduce/neonbfdot.h +7 -218
- package/include/numkong/reduce/neonfhm.h +9 -381
- package/include/numkong/reduce/neonsdot.h +9 -9
- package/include/numkong/reduce/rvv.h +928 -802
- package/include/numkong/reduce/serial.h +23 -27
- package/include/numkong/reduce/sierra.h +20 -20
- package/include/numkong/reduce/skylake.h +326 -324
- package/include/numkong/reduce/v128relaxed.h +52 -52
- package/include/numkong/reduce.h +4 -23
- package/include/numkong/reduce.hpp +156 -11
- package/include/numkong/scalar/README.md +6 -6
- package/include/numkong/scalar/haswell.h +26 -17
- package/include/numkong/scalar/loongsonasx.h +74 -0
- package/include/numkong/scalar/neon.h +9 -9
- package/include/numkong/scalar/powervsx.h +96 -0
- package/include/numkong/scalar/rvv.h +2 -2
- package/include/numkong/scalar/sapphire.h +21 -10
- package/include/numkong/scalar/serial.h +21 -21
- package/include/numkong/scalar.h +13 -0
- package/include/numkong/set/README.md +28 -28
- package/include/numkong/set/haswell.h +12 -12
- package/include/numkong/set/icelake.h +14 -14
- package/include/numkong/set/loongsonasx.h +181 -0
- package/include/numkong/set/neon.h +17 -18
- package/include/numkong/set/powervsx.h +326 -0
- package/include/numkong/set/rvv.h +4 -4
- package/include/numkong/set/serial.h +6 -6
- package/include/numkong/set/sve.h +60 -59
- package/include/numkong/set/v128relaxed.h +6 -6
- package/include/numkong/set.h +21 -7
- package/include/numkong/sets/README.md +26 -26
- package/include/numkong/sets/loongsonasx.h +52 -0
- package/include/numkong/sets/powervsx.h +65 -0
- package/include/numkong/sets/smebi32.h +395 -364
- package/include/numkong/sets.h +83 -40
- package/include/numkong/sparse/README.md +4 -4
- package/include/numkong/sparse/icelake.h +101 -101
- package/include/numkong/sparse/serial.h +1 -1
- package/include/numkong/sparse/sve2.h +137 -141
- package/include/numkong/sparse/turin.h +12 -12
- package/include/numkong/sparse.h +10 -10
- package/include/numkong/spatial/README.md +230 -226
- package/include/numkong/spatial/alder.h +113 -116
- package/include/numkong/spatial/diamond.h +240 -0
- package/include/numkong/spatial/genoa.h +0 -68
- package/include/numkong/spatial/haswell.h +74 -55
- package/include/numkong/spatial/icelake.h +539 -58
- package/include/numkong/spatial/loongsonasx.h +483 -0
- package/include/numkong/spatial/neon.h +125 -52
- package/include/numkong/spatial/neonbfdot.h +8 -9
- package/include/numkong/spatial/neonfp8.h +258 -0
- package/include/numkong/spatial/neonsdot.h +180 -12
- package/include/numkong/spatial/powervsx.h +738 -0
- package/include/numkong/spatial/rvv.h +146 -139
- package/include/numkong/spatial/rvvbf16.h +17 -12
- package/include/numkong/spatial/rvvhalf.h +13 -10
- package/include/numkong/spatial/serial.h +13 -12
- package/include/numkong/spatial/sierra.h +232 -39
- package/include/numkong/spatial/skylake.h +73 -74
- package/include/numkong/spatial/sve.h +93 -72
- package/include/numkong/spatial/svebfdot.h +29 -29
- package/include/numkong/spatial/svehalf.h +52 -26
- package/include/numkong/spatial/svesdot.h +142 -0
- package/include/numkong/spatial/v128relaxed.h +293 -41
- package/include/numkong/spatial.h +338 -82
- package/include/numkong/spatials/README.md +194 -194
- package/include/numkong/spatials/diamond.h +82 -0
- package/include/numkong/spatials/haswell.h +2 -2
- package/include/numkong/spatials/loongsonasx.h +153 -0
- package/include/numkong/spatials/neonfp8.h +111 -0
- package/include/numkong/spatials/neonsdot.h +34 -0
- package/include/numkong/spatials/powervsx.h +153 -0
- package/include/numkong/spatials/rvv.h +259 -243
- package/include/numkong/spatials/sapphireamx.h +173 -173
- package/include/numkong/spatials/serial.h +2 -2
- package/include/numkong/spatials/skylake.h +2 -2
- package/include/numkong/spatials/sme.h +590 -605
- package/include/numkong/spatials/smef64.h +139 -130
- package/include/numkong/spatials/v128relaxed.h +2 -2
- package/include/numkong/spatials.h +820 -500
- package/include/numkong/spatials.hpp +49 -48
- package/include/numkong/tensor.hpp +406 -17
- package/include/numkong/trigonometry/README.md +19 -19
- package/include/numkong/trigonometry/haswell.h +402 -401
- package/include/numkong/trigonometry/neon.h +386 -387
- package/include/numkong/trigonometry/rvv.h +52 -51
- package/include/numkong/trigonometry/serial.h +13 -13
- package/include/numkong/trigonometry/skylake.h +373 -369
- package/include/numkong/trigonometry/v128relaxed.h +375 -374
- package/include/numkong/trigonometry.h +13 -13
- package/include/numkong/trigonometry.hpp +2 -2
- package/include/numkong/types.h +287 -49
- package/include/numkong/types.hpp +436 -12
- package/include/numkong/vector.hpp +82 -14
- package/javascript/dist/cjs/numkong-wasm.js +6 -12
- package/javascript/dist/cjs/numkong.d.ts +7 -1
- package/javascript/dist/cjs/numkong.js +37 -11
- package/javascript/dist/cjs/types.d.ts +9 -0
- package/javascript/dist/cjs/types.js +96 -0
- package/javascript/dist/esm/numkong-browser.d.ts +14 -0
- package/javascript/dist/esm/numkong-browser.js +23 -0
- package/javascript/dist/esm/numkong-wasm.js +6 -12
- package/javascript/dist/esm/numkong.d.ts +7 -1
- package/javascript/dist/esm/numkong.js +37 -11
- package/javascript/dist/esm/types.d.ts +9 -0
- package/javascript/dist/esm/types.js +96 -0
- package/javascript/node-gyp-build.d.ts +4 -1
- package/javascript/numkong-browser.ts +40 -0
- package/javascript/numkong-wasm.ts +7 -13
- package/javascript/numkong.c +5 -26
- package/javascript/numkong.ts +36 -11
- package/javascript/tsconfig-base.json +1 -0
- package/javascript/tsconfig-cjs.json +6 -1
- package/javascript/types.ts +110 -0
- package/numkong.gypi +101 -0
- package/package.json +34 -13
- package/probes/arm_neon.c +8 -0
- package/probes/arm_neon_bfdot.c +9 -0
- package/probes/arm_neon_fhm.c +9 -0
- package/probes/arm_neon_half.c +8 -0
- package/probes/arm_neon_sdot.c +9 -0
- package/probes/arm_neonfp8.c +9 -0
- package/probes/arm_sme.c +16 -0
- package/probes/arm_sme2.c +16 -0
- package/probes/arm_sme2p1.c +16 -0
- package/probes/arm_sme_bf16.c +16 -0
- package/probes/arm_sme_bi32.c +16 -0
- package/probes/arm_sme_f64.c +16 -0
- package/probes/arm_sme_fa64.c +14 -0
- package/probes/arm_sme_half.c +16 -0
- package/probes/arm_sme_lut2.c +15 -0
- package/probes/arm_sve.c +18 -0
- package/probes/arm_sve2.c +20 -0
- package/probes/arm_sve2p1.c +18 -0
- package/probes/arm_sve_bfdot.c +20 -0
- package/probes/arm_sve_half.c +18 -0
- package/probes/arm_sve_sdot.c +21 -0
- package/probes/loongarch_lasx.c +12 -0
- package/probes/power_vsx.c +12 -0
- package/probes/probe.js +127 -0
- package/probes/riscv_rvv.c +14 -0
- package/probes/riscv_rvv_bb.c +15 -0
- package/probes/riscv_rvv_bf16.c +17 -0
- package/probes/riscv_rvv_half.c +14 -0
- package/probes/wasm_v128relaxed.c +11 -0
- package/probes/x86_alder.c +17 -0
- package/probes/x86_diamond.c +17 -0
- package/probes/x86_genoa.c +17 -0
- package/probes/x86_graniteamx.c +19 -0
- package/probes/x86_haswell.c +11 -0
- package/probes/x86_icelake.c +17 -0
- package/probes/x86_sapphire.c +16 -0
- package/probes/x86_sapphireamx.c +18 -0
- package/probes/x86_sierra.c +17 -0
- package/probes/x86_skylake.c +15 -0
- package/probes/x86_turin.c +17 -0
- package/wasm/numkong-emscripten.js +2 -0
- package/wasm/numkong.d.ts +14 -0
- package/wasm/numkong.js +1124 -0
- package/wasm/numkong.wasm +0 -0
- package/include/numkong/curved/neonhalf.h +0 -212
- package/include/numkong/dot/neonhalf.h +0 -198
- package/include/numkong/dots/neonhalf.h +0 -57
- package/include/numkong/mesh/neonhalf.h +0 -616
- package/include/numkong/reduce/neonhalf.h +0 -157
- package/include/numkong/spatial/neonhalf.h +0 -118
- package/include/numkong/spatial/sapphire.h +0 -343
- package/include/numkong/spatials/neonhalf.h +0 -58
- package/javascript/README.md +0 -246
package/javascript/types.ts
CHANGED
|
@@ -126,6 +126,14 @@ export class VectorView extends VectorBase {
|
|
|
126
126
|
super(buffer, byteOffset, length, dtype);
|
|
127
127
|
}
|
|
128
128
|
|
|
129
|
+
toString(): string {
|
|
130
|
+
return `VectorView(${this.length}, ${dtypeToString(this.dtype)})`;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
[Symbol.for('nodejs.util.inspect.custom')](): string {
|
|
134
|
+
return this.toString();
|
|
135
|
+
}
|
|
136
|
+
|
|
129
137
|
/** @brief Create a VectorView from any TypedArray, inferring or accepting dtype. */
|
|
130
138
|
static from(arr: TypedArray, dtype?: DType): VectorView {
|
|
131
139
|
const d = dtype ?? inferDtype(arr);
|
|
@@ -159,6 +167,14 @@ export class Vector extends VectorBase {
|
|
|
159
167
|
}
|
|
160
168
|
}
|
|
161
169
|
|
|
170
|
+
toString(): string {
|
|
171
|
+
return `Vector(${this.length}, ${dtypeToString(this.dtype)})`;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
[Symbol.for('nodejs.util.inspect.custom')](): string {
|
|
175
|
+
return this.toString();
|
|
176
|
+
}
|
|
177
|
+
|
|
162
178
|
/** @brief Create an owning Vector by copying data from a TypedArray. */
|
|
163
179
|
static fromTypedArray(arr: TypedArray, dtype?: DType): Vector {
|
|
164
180
|
const d = dtype ?? inferDtype(arr);
|
|
@@ -254,6 +270,14 @@ export class Matrix extends MatrixBase {
|
|
|
254
270
|
}
|
|
255
271
|
}
|
|
256
272
|
|
|
273
|
+
toString(): string {
|
|
274
|
+
return `Matrix(${this.rows}\u00d7${this.cols}, ${dtypeToString(this.dtype)})`;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
[Symbol.for('nodejs.util.inspect.custom')](): string {
|
|
278
|
+
return this.toString();
|
|
279
|
+
}
|
|
280
|
+
|
|
257
281
|
static fromTypedArray(array: TypedArray, rows: number, cols: number, dtype?: DType): Matrix {
|
|
258
282
|
const d = dtype ?? inferDtype(array);
|
|
259
283
|
const buf = (array.buffer as ArrayBuffer).slice(array.byteOffset, array.byteOffset + array.byteLength);
|
|
@@ -302,6 +326,14 @@ export class PackedMatrix {
|
|
|
302
326
|
|
|
303
327
|
dispose(): void { this._disposed = true; }
|
|
304
328
|
get disposed(): boolean { return this._disposed; }
|
|
329
|
+
|
|
330
|
+
toString(): string {
|
|
331
|
+
return `PackedMatrix(${this.width}\u00d7${this.depth}, ${dtypeToString(this.dtype)}, ${this.byteLength} bytes)`;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
[Symbol.for('nodejs.util.inspect.custom')](): string {
|
|
335
|
+
return this.toString();
|
|
336
|
+
}
|
|
305
337
|
}
|
|
306
338
|
|
|
307
339
|
/** @brief Kernel family identifiers for output dtype resolution. */
|
|
@@ -389,6 +421,22 @@ export class Float16Array extends Uint16Array {
|
|
|
389
421
|
}
|
|
390
422
|
this[index] = conversionFunctions.castF32ToF16(value);
|
|
391
423
|
}
|
|
424
|
+
|
|
425
|
+
toString(): string {
|
|
426
|
+
if (!conversionFunctions) return `Float16Array(${this.length})`;
|
|
427
|
+
const limit = Math.min(this.length, 20);
|
|
428
|
+
const parts: string[] = [];
|
|
429
|
+
for (let i = 0; i < limit; i++) {
|
|
430
|
+
const f = conversionFunctions.castF16ToF32(this[i]);
|
|
431
|
+
parts.push(`${f} [0x${this[i].toString(16).padStart(4, '0')}]`);
|
|
432
|
+
}
|
|
433
|
+
const suffix = this.length > 20 ? ', ...' : '';
|
|
434
|
+
return `Float16Array(${this.length}) [${parts.join(', ')}${suffix}]`;
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
[Symbol.for('nodejs.util.inspect.custom')](): string {
|
|
438
|
+
return this.toString();
|
|
439
|
+
}
|
|
392
440
|
}
|
|
393
441
|
|
|
394
442
|
/**
|
|
@@ -443,6 +491,22 @@ export class BFloat16Array extends Uint16Array {
|
|
|
443
491
|
}
|
|
444
492
|
this[index] = conversionFunctions.castF32ToBF16(value);
|
|
445
493
|
}
|
|
494
|
+
|
|
495
|
+
toString(): string {
|
|
496
|
+
if (!conversionFunctions) return `BFloat16Array(${this.length})`;
|
|
497
|
+
const limit = Math.min(this.length, 20);
|
|
498
|
+
const parts: string[] = [];
|
|
499
|
+
for (let i = 0; i < limit; i++) {
|
|
500
|
+
const f = conversionFunctions.castBF16ToF32(this[i]);
|
|
501
|
+
parts.push(`${f} [0x${this[i].toString(16).padStart(4, '0')}]`);
|
|
502
|
+
}
|
|
503
|
+
const suffix = this.length > 20 ? ', ...' : '';
|
|
504
|
+
return `BFloat16Array(${this.length}) [${parts.join(', ')}${suffix}]`;
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
[Symbol.for('nodejs.util.inspect.custom')](): string {
|
|
508
|
+
return this.toString();
|
|
509
|
+
}
|
|
446
510
|
}
|
|
447
511
|
|
|
448
512
|
/**
|
|
@@ -496,6 +560,22 @@ export class E4M3Array extends Uint8Array {
|
|
|
496
560
|
}
|
|
497
561
|
this[index] = conversionFunctions.castF32ToE4M3(value);
|
|
498
562
|
}
|
|
563
|
+
|
|
564
|
+
toString(): string {
|
|
565
|
+
if (!conversionFunctions) return `E4M3Array(${this.length})`;
|
|
566
|
+
const limit = Math.min(this.length, 20);
|
|
567
|
+
const parts: string[] = [];
|
|
568
|
+
for (let i = 0; i < limit; i++) {
|
|
569
|
+
const f = conversionFunctions.castE4M3ToF32(this[i]);
|
|
570
|
+
parts.push(`${f} [0x${this[i].toString(16).padStart(2, '0')}]`);
|
|
571
|
+
}
|
|
572
|
+
const suffix = this.length > 20 ? ', ...' : '';
|
|
573
|
+
return `E4M3Array(${this.length}) [${parts.join(', ')}${suffix}]`;
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
[Symbol.for('nodejs.util.inspect.custom')](): string {
|
|
577
|
+
return this.toString();
|
|
578
|
+
}
|
|
499
579
|
}
|
|
500
580
|
|
|
501
581
|
/**
|
|
@@ -549,6 +629,22 @@ export class E5M2Array extends Uint8Array {
|
|
|
549
629
|
}
|
|
550
630
|
this[index] = conversionFunctions.castF32ToE5M2(value);
|
|
551
631
|
}
|
|
632
|
+
|
|
633
|
+
toString(): string {
|
|
634
|
+
if (!conversionFunctions) return `E5M2Array(${this.length})`;
|
|
635
|
+
const limit = Math.min(this.length, 20);
|
|
636
|
+
const parts: string[] = [];
|
|
637
|
+
for (let i = 0; i < limit; i++) {
|
|
638
|
+
const f = conversionFunctions.castE5M2ToF32(this[i]);
|
|
639
|
+
parts.push(`${f} [0x${this[i].toString(16).padStart(2, '0')}]`);
|
|
640
|
+
}
|
|
641
|
+
const suffix = this.length > 20 ? ', ...' : '';
|
|
642
|
+
return `E5M2Array(${this.length}) [${parts.join(', ')}${suffix}]`;
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
[Symbol.for('nodejs.util.inspect.custom')](): string {
|
|
646
|
+
return this.toString();
|
|
647
|
+
}
|
|
552
648
|
}
|
|
553
649
|
|
|
554
650
|
/**
|
|
@@ -636,6 +732,20 @@ export class BinaryArray extends Uint8Array {
|
|
|
636
732
|
}
|
|
637
733
|
return binary;
|
|
638
734
|
}
|
|
735
|
+
|
|
736
|
+
toString(): string {
|
|
737
|
+
const limit = Math.min(this.length, 20);
|
|
738
|
+
const parts: string[] = [];
|
|
739
|
+
for (let i = 0; i < limit; i++) {
|
|
740
|
+
parts.push(`0b${this[i].toString(2).padStart(8, '0')}`);
|
|
741
|
+
}
|
|
742
|
+
const suffix = this.length > 20 ? ', ...' : '';
|
|
743
|
+
return `BinaryArray(${this._bitLength}) [${parts.join(', ')}${suffix}]`;
|
|
744
|
+
}
|
|
745
|
+
|
|
746
|
+
[Symbol.for('nodejs.util.inspect.custom')](): string {
|
|
747
|
+
return this.toString();
|
|
748
|
+
}
|
|
639
749
|
}
|
|
640
750
|
|
|
641
751
|
/**
|
package/numkong.gypi
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# NumKong GYP include for downstream native addons.
|
|
2
|
+
#
|
|
3
|
+
# Usage in your binding.gyp:
|
|
4
|
+
#
|
|
5
|
+
# {
|
|
6
|
+
# "includes": ["<!(node -p \"require.resolve('numkong/numkong.gypi')\")"],
|
|
7
|
+
# "targets": [{
|
|
8
|
+
# "target_name": "my_addon",
|
|
9
|
+
# "dependencies": ["numkong_lib"],
|
|
10
|
+
# "sources": ["my_addon.c"],
|
|
11
|
+
# }]
|
|
12
|
+
# }
|
|
13
|
+
#
|
|
14
|
+
{
|
|
15
|
+
"variables": {
|
|
16
|
+
"numkong_root%": "<!(node -e \"try{console.log(require('path').dirname(require.resolve('numkong/package.json')))}catch{console.log('numkong')}\")",
|
|
17
|
+
},
|
|
18
|
+
"targets": [
|
|
19
|
+
{
|
|
20
|
+
"target_name": "numkong_lib",
|
|
21
|
+
"type": "static_library",
|
|
22
|
+
"actions": [
|
|
23
|
+
{
|
|
24
|
+
"action_name": "numkong_probe",
|
|
25
|
+
"inputs": ["<(numkong_root)/probes/probe.js"],
|
|
26
|
+
"outputs": ["<!(node -e \"console.log(require('path').resolve('<(numkong_root)','nk_probes.h'))\")"],
|
|
27
|
+
"action": ["node", "<(numkong_root)/probes/probe.js"],
|
|
28
|
+
"message": "Probing ISA capabilities for NumKong",
|
|
29
|
+
},
|
|
30
|
+
],
|
|
31
|
+
"sources": [
|
|
32
|
+
"<(numkong_root)/c/numkong.c",
|
|
33
|
+
"<(numkong_root)/c/dispatch_f64.c",
|
|
34
|
+
"<(numkong_root)/c/dispatch_f32.c",
|
|
35
|
+
"<(numkong_root)/c/dispatch_f16.c",
|
|
36
|
+
"<(numkong_root)/c/dispatch_bf16.c",
|
|
37
|
+
"<(numkong_root)/c/dispatch_i8.c",
|
|
38
|
+
"<(numkong_root)/c/dispatch_u8.c",
|
|
39
|
+
"<(numkong_root)/c/dispatch_u1.c",
|
|
40
|
+
"<(numkong_root)/c/dispatch_e4m3.c",
|
|
41
|
+
"<(numkong_root)/c/dispatch_e5m2.c",
|
|
42
|
+
"<(numkong_root)/c/dispatch_other.c",
|
|
43
|
+
"<(numkong_root)/c/dispatch_f64c.c",
|
|
44
|
+
"<(numkong_root)/c/dispatch_f32c.c",
|
|
45
|
+
"<(numkong_root)/c/dispatch_f16c.c",
|
|
46
|
+
"<(numkong_root)/c/dispatch_bf16c.c",
|
|
47
|
+
"<(numkong_root)/c/dispatch_i16.c",
|
|
48
|
+
"<(numkong_root)/c/dispatch_i32.c",
|
|
49
|
+
"<(numkong_root)/c/dispatch_i64.c",
|
|
50
|
+
"<(numkong_root)/c/dispatch_u16.c",
|
|
51
|
+
"<(numkong_root)/c/dispatch_u32.c",
|
|
52
|
+
"<(numkong_root)/c/dispatch_u64.c",
|
|
53
|
+
"<(numkong_root)/c/dispatch_i4.c",
|
|
54
|
+
"<(numkong_root)/c/dispatch_u4.c",
|
|
55
|
+
"<(numkong_root)/c/dispatch_e2m3.c",
|
|
56
|
+
"<(numkong_root)/c/dispatch_e3m2.c",
|
|
57
|
+
],
|
|
58
|
+
"include_dirs": [
|
|
59
|
+
"<(numkong_root)/include",
|
|
60
|
+
],
|
|
61
|
+
"defines": [
|
|
62
|
+
"NK_NATIVE_F16=0",
|
|
63
|
+
"NK_NATIVE_BF16=0",
|
|
64
|
+
"NK_DYNAMIC_DISPATCH=1",
|
|
65
|
+
],
|
|
66
|
+
"cflags": [
|
|
67
|
+
"-std=c11",
|
|
68
|
+
"-O3",
|
|
69
|
+
"-Wno-unknown-pragmas",
|
|
70
|
+
"-Wno-maybe-uninitialized",
|
|
71
|
+
"-Wno-cast-function-type",
|
|
72
|
+
"-Wno-switch",
|
|
73
|
+
"-Wno-psabi",
|
|
74
|
+
"-include",
|
|
75
|
+
"<!(node -e \"console.log(require('path').resolve('<(numkong_root)','nk_probes.h'))\")",
|
|
76
|
+
],
|
|
77
|
+
"msvs_settings": {
|
|
78
|
+
"VCCLCompilerTool": {
|
|
79
|
+
"ForcedIncludeFiles": [
|
|
80
|
+
"<!(node -e \"console.log(require('path').resolve('<(numkong_root)','nk_probes.h'))\")",
|
|
81
|
+
],
|
|
82
|
+
},
|
|
83
|
+
},
|
|
84
|
+
"conditions": [
|
|
85
|
+
[
|
|
86
|
+
"OS=='mac'",
|
|
87
|
+
{
|
|
88
|
+
"xcode_settings": {
|
|
89
|
+
"MACOSX_DEPLOYMENT_TARGET": "11.0",
|
|
90
|
+
},
|
|
91
|
+
},
|
|
92
|
+
],
|
|
93
|
+
],
|
|
94
|
+
"direct_dependent_settings": {
|
|
95
|
+
"include_dirs": [
|
|
96
|
+
"<(numkong_root)/include",
|
|
97
|
+
],
|
|
98
|
+
},
|
|
99
|
+
},
|
|
100
|
+
],
|
|
101
|
+
}
|
package/package.json
CHANGED
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "numkong",
|
|
3
|
-
"version": "7.
|
|
4
|
-
"description": "Portable mixed-precision
|
|
3
|
+
"version": "7.4.2",
|
|
4
|
+
"description": "Portable mixed-precision math, linear-algebra, & retrieval library with 2000+ SIMD kernels for x86, Arm, RISC-V, LoongArch, Power, & WebAssembly",
|
|
5
5
|
"homepage": "https://github.com/ashvardanian/NumKong",
|
|
6
6
|
"author": "Ash Vardanian",
|
|
7
|
-
"license": "Apache
|
|
7
|
+
"license": "Apache-2.0",
|
|
8
|
+
"repository": {
|
|
9
|
+
"type": "git",
|
|
10
|
+
"url": "https://github.com/ashvardanian/NumKong"
|
|
11
|
+
},
|
|
8
12
|
"keywords": [
|
|
9
13
|
"vector",
|
|
10
14
|
"distance",
|
|
@@ -22,11 +26,10 @@
|
|
|
22
26
|
"f16"
|
|
23
27
|
],
|
|
24
28
|
"scripts": {
|
|
29
|
+
"preinstall": "node probes/probe.js",
|
|
25
30
|
"install": "node-gyp-build",
|
|
26
|
-
"prebuild-single": "prebuildify --napi --strip --target=22.0.0",
|
|
27
|
-
"prebuild-arm64": "prebuildify --arch arm64 --napi --strip --target=22.0.0",
|
|
28
|
-
"prebuild-darwin-x64+arm64": "prebuildify --arch arm64+x64 --napi --strip --target=22.0.0",
|
|
29
31
|
"build-js": "rm -fr javascript/dist/* && tsc -p javascript/tsconfig-esm.json && tsc -p javascript/tsconfig-cjs.json && cp javascript/dist-package-esm.json javascript/dist/esm/package.json && cp javascript/dist-package-cjs.json javascript/dist/cjs/package.json",
|
|
32
|
+
"build-browser": "esbuild javascript/dist/esm/numkong-browser.js --bundle --format=esm --platform=browser --target=es2022 --outfile=build-wasm/numkong-bundle.js",
|
|
30
33
|
"test": "node --test ./test/test.mjs",
|
|
31
34
|
"test:bun": "bun test ./test/test.mjs",
|
|
32
35
|
"test:deno": "deno test -A --no-check",
|
|
@@ -49,7 +52,13 @@
|
|
|
49
52
|
".": {
|
|
50
53
|
"import": "./javascript/dist/esm/numkong.js",
|
|
51
54
|
"require": "./javascript/dist/cjs/numkong.js"
|
|
52
|
-
}
|
|
55
|
+
},
|
|
56
|
+
"./wasm": {
|
|
57
|
+
"import": "./wasm/numkong.js",
|
|
58
|
+
"types": "./wasm/numkong.d.ts"
|
|
59
|
+
},
|
|
60
|
+
"./numkong.gypi": "./numkong.gypi",
|
|
61
|
+
"./package.json": "./package.json"
|
|
53
62
|
},
|
|
54
63
|
"engines": {
|
|
55
64
|
"node": ">=22.0.0"
|
|
@@ -65,23 +74,35 @@
|
|
|
65
74
|
"@types/node": "^24.10.0",
|
|
66
75
|
"node-gyp": "^11.5.0",
|
|
67
76
|
"playwright": "^1.58.0",
|
|
68
|
-
"prebuildify": "^6.0.1",
|
|
69
77
|
"http-server": "^14.1.1",
|
|
70
|
-
"
|
|
78
|
+
"esbuild": "^0.25.0",
|
|
79
|
+
"typescript": "^5.9.3",
|
|
80
|
+
"benchmark": "^2.1.4",
|
|
81
|
+
"mathjs": "^14.9.0",
|
|
82
|
+
"usearch": "^2.21.0"
|
|
71
83
|
},
|
|
72
84
|
"files": [
|
|
73
85
|
"c/",
|
|
74
86
|
"include/",
|
|
75
|
-
"javascript/",
|
|
87
|
+
"javascript/dist/",
|
|
88
|
+
"javascript/*.ts",
|
|
89
|
+
"javascript/*.json",
|
|
90
|
+
"javascript/*.c",
|
|
91
|
+
"probes/",
|
|
92
|
+
"wasm/",
|
|
76
93
|
"binding.gyp",
|
|
94
|
+
"numkong.gypi",
|
|
77
95
|
"LICENSE"
|
|
78
96
|
],
|
|
79
97
|
"prettier": {
|
|
80
98
|
"printWidth": 120
|
|
81
99
|
},
|
|
82
100
|
"optionalDependencies": {
|
|
83
|
-
"
|
|
84
|
-
"
|
|
85
|
-
"
|
|
101
|
+
"@numkong/darwin-arm64": "7.4.2",
|
|
102
|
+
"@numkong/darwin-x64": "7.4.2",
|
|
103
|
+
"@numkong/linux-arm64": "7.4.2",
|
|
104
|
+
"@numkong/linux-x64": "7.4.2",
|
|
105
|
+
"@numkong/win32-arm64": "7.4.2",
|
|
106
|
+
"@numkong/win32-x64": "7.4.2"
|
|
86
107
|
}
|
|
87
108
|
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/* NumKong ISA probe: NEON BF16 (ARMv8.6-A bfloat16 dot product) */
|
|
2
|
+
#include <arm_neon.h>
|
|
3
|
+
int main(void) {
|
|
4
|
+
bfloat16x8_t a = vdupq_n_bf16(1.0f);
|
|
5
|
+
bfloat16x8_t b = vdupq_n_bf16(2.0f);
|
|
6
|
+
float32x4_t c = vdupq_n_f32(0.0f);
|
|
7
|
+
c = vbfdotq_f32(c, a, b);
|
|
8
|
+
return vgetq_lane_f32(c, 0) > 0.0f ? 0 : 1;
|
|
9
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/* NumKong ISA probe: NEON FHM (ARMv8.2-A FP16 fused multiply-add) */
|
|
2
|
+
#include <arm_neon.h>
|
|
3
|
+
int main(void) {
|
|
4
|
+
float16x8_t a = vdupq_n_f16(1.0f);
|
|
5
|
+
float16x8_t b = vdupq_n_f16(2.0f);
|
|
6
|
+
float32x4_t c = vdupq_n_f32(0.0f);
|
|
7
|
+
c = vfmlalq_low_f16(c, a, b);
|
|
8
|
+
return vgetq_lane_f32(c, 0) > 0.0f ? 0 : 1;
|
|
9
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/* NumKong ISA probe: NEON SDOT (ARMv8.2-A dot product) */
|
|
2
|
+
#include <arm_neon.h>
|
|
3
|
+
int main(void) {
|
|
4
|
+
int8x16_t a = vdupq_n_s8(1);
|
|
5
|
+
int8x16_t b = vdupq_n_s8(2);
|
|
6
|
+
int32x4_t c = vdupq_n_s32(0);
|
|
7
|
+
c = vdotq_s32(c, a, b);
|
|
8
|
+
return vgetq_lane_s32(c, 0) > 0 ? 0 : 1;
|
|
9
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/* NumKong ISA probe: NEON FP8 (fp8dot4) */
|
|
2
|
+
#include <arm_neon.h>
|
|
3
|
+
int test_neonfp8(void) {
|
|
4
|
+
mfloat8x16_t a = vreinterpretq_mf8_u8(vdupq_n_u8(0));
|
|
5
|
+
float32x4_t acc = vdupq_n_f32(0.0f);
|
|
6
|
+
acc = vdot_f32_mf8_fpm(acc, a, a, 0);
|
|
7
|
+
return vgetq_lane_f32(acc, 0) == 0.0f ? 0 : 1;
|
|
8
|
+
}
|
|
9
|
+
int main(void) { return test_neonfp8(); }
|
package/probes/arm_sme.c
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/* NumKong ISA probe: SME (Scalable Matrix Extension) */
|
|
2
|
+
#if defined(_WIN32)
|
|
3
|
+
#error "SVE/SME not supported on Windows ARM"
|
|
4
|
+
#endif
|
|
5
|
+
|
|
6
|
+
#if !defined(__ARM_FEATURE_SME)
|
|
7
|
+
#error "Feature not available"
|
|
8
|
+
#endif
|
|
9
|
+
#include <arm_sme.h>
|
|
10
|
+
__arm_new("za") __arm_locally_streaming int test_sme(void) {
|
|
11
|
+
svfloat32_t a = svdup_f32(1.0f);
|
|
12
|
+
svbool_t p = svptrue_b32();
|
|
13
|
+
svmopa_za32_f32_m(0, p, p, a, a);
|
|
14
|
+
return 0;
|
|
15
|
+
}
|
|
16
|
+
int main(void) { return test_sme(); }
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/* NumKong ISA probe: SME2 */
|
|
2
|
+
#if defined(_WIN32)
|
|
3
|
+
#error "SVE/SME not supported on Windows ARM"
|
|
4
|
+
#endif
|
|
5
|
+
|
|
6
|
+
#if !defined(__ARM_FEATURE_SME2)
|
|
7
|
+
#error "Feature not available"
|
|
8
|
+
#endif
|
|
9
|
+
#include <arm_sme.h>
|
|
10
|
+
__arm_new("za") __arm_locally_streaming int test_sme2(void) {
|
|
11
|
+
svfloat32_t a = svdup_f32(1.0f);
|
|
12
|
+
svbool_t p = svptrue_b32();
|
|
13
|
+
svmopa_za32_f32_m(0, p, p, a, a);
|
|
14
|
+
return 0;
|
|
15
|
+
}
|
|
16
|
+
int main(void) { return test_sme2(); }
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/* NumKong ISA probe: SME2P1 */
|
|
2
|
+
#if defined(_WIN32)
|
|
3
|
+
#error "SVE/SME not supported on Windows ARM"
|
|
4
|
+
#endif
|
|
5
|
+
|
|
6
|
+
#if !defined(__ARM_FEATURE_SME2)
|
|
7
|
+
#error "Feature not available"
|
|
8
|
+
#endif
|
|
9
|
+
#include <arm_sme.h>
|
|
10
|
+
__arm_new("za") __arm_locally_streaming int test_sme2p1(void) {
|
|
11
|
+
svfloat32_t a = svdup_f32(1.0f);
|
|
12
|
+
svbool_t p = svptrue_b32();
|
|
13
|
+
svmopa_za32_f32_m(0, p, p, a, a);
|
|
14
|
+
return 0;
|
|
15
|
+
}
|
|
16
|
+
int main(void) { return test_sme2p1(); }
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/* NumKong ISA probe: SME BF16 (BFloat16 outer product) */
|
|
2
|
+
#if defined(_WIN32)
|
|
3
|
+
#error "SVE/SME not supported on Windows ARM"
|
|
4
|
+
#endif
|
|
5
|
+
|
|
6
|
+
#if !defined(__ARM_FEATURE_SME)
|
|
7
|
+
#error "Feature not available"
|
|
8
|
+
#endif
|
|
9
|
+
#include <arm_sme.h>
|
|
10
|
+
__arm_new("za") __arm_locally_streaming int test_smebf16(void) {
|
|
11
|
+
svbfloat16_t a = svdup_bf16(0.0f);
|
|
12
|
+
svbool_t p = svptrue_b16();
|
|
13
|
+
svmopa_za32_bf16_m(0, p, p, a, a);
|
|
14
|
+
return 0;
|
|
15
|
+
}
|
|
16
|
+
int main(void) { return test_smebf16(); }
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/* NumKong ISA probe: SME BI32 (boolean/integer 32-bit outer product) */
|
|
2
|
+
#if defined(_WIN32)
|
|
3
|
+
#error "SVE/SME not supported on Windows ARM"
|
|
4
|
+
#endif
|
|
5
|
+
|
|
6
|
+
#if !defined(__ARM_FEATURE_SME2)
|
|
7
|
+
#error "Feature not available"
|
|
8
|
+
#endif
|
|
9
|
+
#include <arm_sme.h>
|
|
10
|
+
__arm_new("za") __arm_locally_streaming int test_smebi32(void) {
|
|
11
|
+
svuint32_t a = svdup_u32(1);
|
|
12
|
+
svbool_t p = svptrue_b32();
|
|
13
|
+
svbmopa_za32_u32_m(0, p, p, a, a);
|
|
14
|
+
return 0;
|
|
15
|
+
}
|
|
16
|
+
int main(void) { return test_smebi32(); }
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/* NumKong ISA probe: SME F64 (FEAT_SME_F64F64) */
|
|
2
|
+
#if defined(_WIN32)
|
|
3
|
+
#error "SVE/SME not supported on Windows ARM"
|
|
4
|
+
#endif
|
|
5
|
+
|
|
6
|
+
#if !defined(__ARM_FEATURE_SME)
|
|
7
|
+
#error "Feature not available"
|
|
8
|
+
#endif
|
|
9
|
+
#include <arm_sme.h>
|
|
10
|
+
__arm_new("za") __arm_locally_streaming int test_smef64(void) {
|
|
11
|
+
svfloat64_t a = svdup_f64(1.0);
|
|
12
|
+
svbool_t p = svptrue_b64();
|
|
13
|
+
svmopa_za64_f64_m(0, p, p, a, a);
|
|
14
|
+
return 0;
|
|
15
|
+
}
|
|
16
|
+
int main(void) { return test_smef64(); }
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/* NumKong ISA probe: SME FA64 (FEAT_SME_FA64, full SVE2 in streaming mode) */
|
|
2
|
+
#if defined(_WIN32)
|
|
3
|
+
#error "SVE/SME not supported on Windows ARM"
|
|
4
|
+
#endif
|
|
5
|
+
|
|
6
|
+
#if !defined(__ARM_FEATURE_SME)
|
|
7
|
+
#error "Feature not available"
|
|
8
|
+
#endif
|
|
9
|
+
#include <arm_sme.h>
|
|
10
|
+
__arm_locally_streaming int test_smefa64(void) {
|
|
11
|
+
svfloat32_t a = svdup_f32(1.0f);
|
|
12
|
+
return (int)svaddv_f32(svptrue_b32(), a) > 0 ? 0 : 1;
|
|
13
|
+
}
|
|
14
|
+
int main(void) { return test_smefa64(); }
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/* NumKong ISA probe: SME F16 (FEAT_SME_F16F16) */
|
|
2
|
+
#if defined(_WIN32)
|
|
3
|
+
#error "SVE/SME not supported on Windows ARM"
|
|
4
|
+
#endif
|
|
5
|
+
|
|
6
|
+
#if !defined(__ARM_FEATURE_SME)
|
|
7
|
+
#error "Feature not available"
|
|
8
|
+
#endif
|
|
9
|
+
#include <arm_sme.h>
|
|
10
|
+
__arm_new("za") __arm_locally_streaming int test_smehalf(void) {
|
|
11
|
+
svfloat16_t a = svdup_f16((__fp16)1.0f);
|
|
12
|
+
svbool_t p = svptrue_b16();
|
|
13
|
+
svmopa_za32_f16_m(0, p, p, a, a);
|
|
14
|
+
return 0;
|
|
15
|
+
}
|
|
16
|
+
int main(void) { return test_smehalf(); }
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/* NumKong ISA probe: SME LUT2 (FEAT_SME_LUTv2) */
|
|
2
|
+
#if defined(_WIN32)
|
|
3
|
+
#error "SVE/SME not supported on Windows ARM"
|
|
4
|
+
#endif
|
|
5
|
+
|
|
6
|
+
#if !defined(__ARM_FEATURE_SME2)
|
|
7
|
+
#error "Feature not available"
|
|
8
|
+
#endif
|
|
9
|
+
#include <arm_sme.h>
|
|
10
|
+
__arm_new("zt0") __arm_locally_streaming int test_smelut2(void) {
|
|
11
|
+
svuint8_t idx = svdup_u8(0);
|
|
12
|
+
svuint8_t r = svluti2_lane_zt_u8(0, idx, 0);
|
|
13
|
+
return (int)svaddv_u8(svptrue_b8(), r) == 0 ? 0 : 1;
|
|
14
|
+
}
|
|
15
|
+
int main(void) { return test_smelut2(); }
|
package/probes/arm_sve.c
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/* NumKong ISA probe: SVE (Scalable Vector Extension) */
|
|
2
|
+
#if defined(_WIN32)
|
|
3
|
+
#error "SVE/SME not supported on Windows ARM"
|
|
4
|
+
#endif
|
|
5
|
+
|
|
6
|
+
#if defined(__APPLE__) && defined(__aarch64__)
|
|
7
|
+
#error "SVE not available on Apple Silicon"
|
|
8
|
+
#endif
|
|
9
|
+
|
|
10
|
+
#if !defined(__ARM_FEATURE_SVE)
|
|
11
|
+
#error "Feature not available"
|
|
12
|
+
#endif
|
|
13
|
+
#include <arm_sve.h>
|
|
14
|
+
int test_sve(void) {
|
|
15
|
+
svfloat32_t z = svdup_f32(1.0f);
|
|
16
|
+
return (int)svaddv_f32(svptrue_b32(), z);
|
|
17
|
+
}
|
|
18
|
+
int main(void) { return test_sve() > 0 ? 0 : 1; }
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/* NumKong ISA probe: SVE2 */
|
|
2
|
+
#if defined(_WIN32)
|
|
3
|
+
#error "SVE/SME not supported on Windows ARM"
|
|
4
|
+
#endif
|
|
5
|
+
|
|
6
|
+
#if defined(__APPLE__) && defined(__aarch64__)
|
|
7
|
+
#error "SVE not available on Apple Silicon"
|
|
8
|
+
#endif
|
|
9
|
+
|
|
10
|
+
#if !defined(__ARM_FEATURE_SVE2)
|
|
11
|
+
#error "Feature not available"
|
|
12
|
+
#endif
|
|
13
|
+
#include <arm_sve.h>
|
|
14
|
+
int test_sve2(void) {
|
|
15
|
+
svint32_t a = svdup_s32(2);
|
|
16
|
+
svint32_t b = svdup_s32(3);
|
|
17
|
+
svint32_t c = svmul_s32_z(svptrue_b32(), a, b);
|
|
18
|
+
return (int)svaddv_s32(svptrue_b32(), c) > 0 ? 0 : 1;
|
|
19
|
+
}
|
|
20
|
+
int main(void) { return test_sve2(); }
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/* NumKong ISA probe: SVE2P1 */
|
|
2
|
+
#if defined(_WIN32)
|
|
3
|
+
#error "SVE/SME not supported on Windows ARM"
|
|
4
|
+
#endif
|
|
5
|
+
|
|
6
|
+
#if defined(__APPLE__) && defined(__aarch64__)
|
|
7
|
+
#error "SVE not available on Apple Silicon"
|
|
8
|
+
#endif
|
|
9
|
+
|
|
10
|
+
#if !defined(__ARM_FEATURE_SVE2)
|
|
11
|
+
#error "Feature not available"
|
|
12
|
+
#endif
|
|
13
|
+
#include <arm_sve.h>
|
|
14
|
+
int test_sve2p1(void) {
|
|
15
|
+
svfloat32_t a = svdup_f32(1.0f);
|
|
16
|
+
return (int)svaddv_f32(svptrue_b32(), a) > 0 ? 0 : 1;
|
|
17
|
+
}
|
|
18
|
+
int main(void) { return test_sve2p1(); }
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/* NumKong ISA probe: SVE BF16 (FEAT_BF16 dot-product) */
|
|
2
|
+
#if defined(_WIN32)
|
|
3
|
+
#error "SVE/SME not supported on Windows ARM"
|
|
4
|
+
#endif
|
|
5
|
+
|
|
6
|
+
#if defined(__APPLE__) && defined(__aarch64__)
|
|
7
|
+
#error "SVE not available on Apple Silicon"
|
|
8
|
+
#endif
|
|
9
|
+
|
|
10
|
+
#if !defined(__ARM_FEATURE_SVE)
|
|
11
|
+
#error "Feature not available"
|
|
12
|
+
#endif
|
|
13
|
+
#include <arm_sve.h>
|
|
14
|
+
int test_svebfdot(void) {
|
|
15
|
+
svfloat32_t acc = svdup_f32(0.0f);
|
|
16
|
+
svbfloat16_t a = svdup_bf16(0.0f);
|
|
17
|
+
acc = svbfdot_f32(acc, a, a);
|
|
18
|
+
return (int)svaddv_f32(svptrue_b32(), acc) == 0 ? 0 : 1;
|
|
19
|
+
}
|
|
20
|
+
int main(void) { return test_svebfdot(); }
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/* NumKong ISA probe: SVE F16 (half-precision) */
|
|
2
|
+
#if defined(_WIN32)
|
|
3
|
+
#error "SVE/SME not supported on Windows ARM"
|
|
4
|
+
#endif
|
|
5
|
+
|
|
6
|
+
#if defined(__APPLE__) && defined(__aarch64__)
|
|
7
|
+
#error "SVE not available on Apple Silicon"
|
|
8
|
+
#endif
|
|
9
|
+
|
|
10
|
+
#if !defined(__ARM_FEATURE_SVE)
|
|
11
|
+
#error "Feature not available"
|
|
12
|
+
#endif
|
|
13
|
+
#include <arm_sve.h>
|
|
14
|
+
int test_svehalf(void) {
|
|
15
|
+
svfloat16_t z = svdup_f16((__fp16)1.0f);
|
|
16
|
+
return (int)svaddv_f16(svptrue_b16(), z);
|
|
17
|
+
}
|
|
18
|
+
int main(void) { return test_svehalf() > 0 ? 0 : 1; }
|