numkong 7.4.3 → 7.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +49 -49
- package/binding.gyp +3 -0
- package/include/numkong/capabilities.h +1 -1
- package/include/numkong/each/haswell.h +4 -4
- package/include/numkong/maxsim/sme.h +65 -27
- package/include/numkong/mesh/README.md +13 -27
- package/include/numkong/mesh/haswell.h +25 -122
- package/include/numkong/mesh/neon.h +21 -110
- package/include/numkong/mesh/neonbfdot.h +4 -43
- package/include/numkong/mesh/rvv.h +7 -82
- package/include/numkong/mesh/serial.h +26 -53
- package/include/numkong/mesh/skylake.h +7 -123
- package/include/numkong/mesh/v128relaxed.h +9 -93
- package/include/numkong/mesh.h +2 -2
- package/include/numkong/mesh.hpp +35 -96
- package/include/numkong/types.h +15 -9
- package/numkong.gypi +3 -0
- package/package.json +7 -7
- package/wasm/numkong.wasm +0 -0
package/include/numkong/types.h
CHANGED
|
@@ -119,6 +119,12 @@
|
|
|
119
119
|
#define NK_MAY_ALIAS_
|
|
120
120
|
#endif
|
|
121
121
|
|
|
122
|
+
#if defined(__has_builtin)
|
|
123
|
+
#define nk_has_builtin_(x) __has_builtin(x)
|
|
124
|
+
#else
|
|
125
|
+
#define nk_has_builtin_(x) 0
|
|
126
|
+
#endif
|
|
127
|
+
|
|
122
128
|
// Allow SIMD kernels to redirect small inputs to serial implementations.
|
|
123
129
|
// Enabled by default for production use. Tests and benchmarks may disable
|
|
124
130
|
// this to isolate SIMD path behavior on small inputs.
|
|
@@ -425,7 +431,7 @@
|
|
|
425
431
|
// AppleClang 17 exposes SME sub-features through `arm_sme.h` builtin aliases,
|
|
426
432
|
// not dedicated `__ARM_FEATURE_*` predefines for every matrix subtype.
|
|
427
433
|
#if !defined(NK_TARGET_SMEF64) || (NK_TARGET_SMEF64 && !NK_TARGET_ARM64_)
|
|
428
|
-
#if defined(__ARM_FEATURE_SME_F64F64) || (
|
|
434
|
+
#if defined(__ARM_FEATURE_SME_F64F64) || nk_has_builtin_(__builtin_sme_svmopa_za64_f64_m)
|
|
429
435
|
#define NK_TARGET_SMEF64 1
|
|
430
436
|
#else
|
|
431
437
|
#undef NK_TARGET_SMEF64
|
|
@@ -434,39 +440,39 @@
|
|
|
434
440
|
#endif // !defined(NK_TARGET_SMEF64) || ...
|
|
435
441
|
|
|
436
442
|
#if !defined(NK_TARGET_SMEBI32) || (NK_TARGET_SMEBI32 && !NK_TARGET_ARM64_)
|
|
437
|
-
#if
|
|
443
|
+
#if nk_has_builtin_(__builtin_sme_svbmopa_za32_u32_m)
|
|
438
444
|
#define NK_TARGET_SMEBI32 1
|
|
439
445
|
#else
|
|
440
446
|
#undef NK_TARGET_SMEBI32
|
|
441
447
|
#define NK_TARGET_SMEBI32 0
|
|
442
|
-
#endif //
|
|
448
|
+
#endif // nk_has_builtin_(__builtin_sme_svbmopa_za32_u32_m)
|
|
443
449
|
#endif // !defined(NK_TARGET_SMEBI32) || ...
|
|
444
450
|
|
|
445
451
|
#if !defined(NK_TARGET_SMEHALF) || (NK_TARGET_SMEHALF && !NK_TARGET_ARM64_)
|
|
446
|
-
#if defined(__ARM_FEATURE_SME_F16F16) || (
|
|
452
|
+
#if defined(__ARM_FEATURE_SME_F16F16) || nk_has_builtin_(__builtin_sme_svmopa_za32_f16_m)
|
|
447
453
|
#define NK_TARGET_SMEHALF 1
|
|
448
454
|
#else
|
|
449
455
|
#undef NK_TARGET_SMEHALF
|
|
450
456
|
#define NK_TARGET_SMEHALF 0
|
|
451
|
-
#endif //
|
|
457
|
+
#endif // nk_has_builtin_(__builtin_sme_svmopa_za32_f16_m)
|
|
452
458
|
#endif // !defined(NK_TARGET_SMEHALF) || ...
|
|
453
459
|
|
|
454
460
|
#if !defined(NK_TARGET_SMEBF16) || (NK_TARGET_SMEBF16 && !NK_TARGET_ARM64_)
|
|
455
|
-
#if
|
|
461
|
+
#if nk_has_builtin_(__builtin_sme_svmopa_za32_bf16_m)
|
|
456
462
|
#define NK_TARGET_SMEBF16 1
|
|
457
463
|
#else
|
|
458
464
|
#undef NK_TARGET_SMEBF16
|
|
459
465
|
#define NK_TARGET_SMEBF16 0
|
|
460
|
-
#endif //
|
|
466
|
+
#endif // nk_has_builtin_(__builtin_sme_svmopa_za32_bf16_m)
|
|
461
467
|
#endif // !defined(NK_TARGET_SMEBF16) || ...
|
|
462
468
|
|
|
463
469
|
#if !defined(NK_TARGET_SMELUT2) || (NK_TARGET_SMELUT2 && !NK_TARGET_ARM64_)
|
|
464
|
-
#if
|
|
470
|
+
#if nk_has_builtin_(__builtin_sme_svluti2_lane_zt_u8)
|
|
465
471
|
#define NK_TARGET_SMELUT2 1
|
|
466
472
|
#else
|
|
467
473
|
#undef NK_TARGET_SMELUT2
|
|
468
474
|
#define NK_TARGET_SMELUT2 0
|
|
469
|
-
#endif //
|
|
475
|
+
#endif // nk_has_builtin_(__builtin_sme_svluti2_lane_zt_u8)
|
|
470
476
|
#endif // !defined(NK_TARGET_SMELUT2) || ...
|
|
471
477
|
|
|
472
478
|
// Compiling for Arm: NK_TARGET_SMEFA64 (FEAT_SME_FA64, full SVE2 in streaming mode)
|
package/numkong.gypi
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "numkong",
|
|
3
|
-
"version": "7.4.
|
|
3
|
+
"version": "7.4.5",
|
|
4
4
|
"description": "Portable mixed-precision math, linear-algebra, & retrieval library with 2000+ SIMD kernels for x86, Arm, RISC-V, LoongArch, Power, & WebAssembly",
|
|
5
5
|
"homepage": "https://github.com/ashvardanian/NumKong",
|
|
6
6
|
"author": "Ash Vardanian",
|
|
@@ -98,11 +98,11 @@
|
|
|
98
98
|
"printWidth": 120
|
|
99
99
|
},
|
|
100
100
|
"optionalDependencies": {
|
|
101
|
-
"@numkong/darwin-arm64": "7.4.
|
|
102
|
-
"@numkong/darwin-x64": "7.4.
|
|
103
|
-
"@numkong/linux-arm64": "7.4.
|
|
104
|
-
"@numkong/linux-x64": "7.4.
|
|
105
|
-
"@numkong/win32-arm64": "7.4.
|
|
106
|
-
"@numkong/win32-x64": "7.4.
|
|
101
|
+
"@numkong/darwin-arm64": "7.4.5",
|
|
102
|
+
"@numkong/darwin-x64": "7.4.5",
|
|
103
|
+
"@numkong/linux-arm64": "7.4.5",
|
|
104
|
+
"@numkong/linux-x64": "7.4.5",
|
|
105
|
+
"@numkong/win32-arm64": "7.4.5",
|
|
106
|
+
"@numkong/win32-x64": "7.4.5"
|
|
107
107
|
}
|
|
108
108
|
}
|
package/wasm/numkong.wasm
CHANGED
|
Binary file
|