whisper.rn 0.5.4 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +5 -0
- package/android/src/main/jni.cpp +13 -0
- package/cpp/ggml-alloc.c +78 -26
- package/cpp/ggml-alloc.h +9 -0
- package/cpp/ggml-backend-impl.h +1 -1
- package/cpp/ggml-backend-reg.cpp +19 -3
- package/cpp/ggml-backend.cpp +72 -20
- package/cpp/ggml-backend.h +2 -1
- package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
- package/cpp/ggml-cpu/arch/arm/repack.cpp +1004 -0
- package/cpp/ggml-cpu/arch/x86/repack.cpp +6 -6
- package/cpp/ggml-cpu/arch-fallback.h +50 -2
- package/cpp/ggml-cpu/ggml-cpu-impl.h +1 -1
- package/cpp/ggml-cpu/ggml-cpu.c +139 -58
- package/cpp/ggml-cpu/ggml-cpu.cpp +4 -0
- package/cpp/ggml-cpu/ops.cpp +170 -18
- package/cpp/ggml-cpu/ops.h +1 -0
- package/cpp/ggml-cpu/repack.cpp +531 -5
- package/cpp/ggml-cpu/repack.h +14 -0
- package/cpp/ggml-cpu/simd-mappings.h +16 -18
- package/cpp/ggml-cpu/vec.cpp +41 -1
- package/cpp/ggml-cpu/vec.h +241 -138
- package/cpp/ggml-cpu.h +1 -0
- package/cpp/ggml-impl.h +0 -4
- package/cpp/ggml-metal/ggml-metal-context.m +26 -16
- package/cpp/ggml-metal/ggml-metal-device.cpp +452 -371
- package/cpp/ggml-metal/ggml-metal-device.h +87 -65
- package/cpp/ggml-metal/ggml-metal-device.m +263 -104
- package/cpp/ggml-metal/ggml-metal-impl.h +58 -4
- package/cpp/ggml-metal/ggml-metal-ops.cpp +415 -98
- package/cpp/ggml-metal/ggml-metal-ops.h +4 -0
- package/cpp/ggml-metal/ggml-metal.cpp +6 -5
- package/cpp/ggml-metal/ggml-metal.metal +404 -34
- package/cpp/ggml.c +110 -31
- package/cpp/ggml.h +51 -12
- package/cpp/jsi/RNWhisperJSI.cpp +1 -0
- package/cpp/whisper.cpp +16 -3
- package/ios/CMakeLists.txt +21 -1
- package/ios/RNWhisperContext.mm +5 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-alloc.h +9 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +1 -1
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +2 -1
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +0 -4
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +51 -12
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-metal.metal +404 -34
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +9 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +1 -1
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +2 -1
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +0 -4
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +51 -12
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-metal.metal +404 -34
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-alloc.h +9 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +1 -1
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +2 -1
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +0 -4
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +51 -12
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-metal.metal +404 -34
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +9 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +1 -1
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +2 -1
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +0 -4
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +51 -12
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-metal.metal +404 -34
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
- package/lib/commonjs/NativeRNWhisper.js.map +1 -1
- package/lib/commonjs/jest-mock.js +2 -0
- package/lib/commonjs/jest-mock.js.map +1 -1
- package/lib/commonjs/version.json +1 -1
- package/lib/module/NativeRNWhisper.js.map +1 -1
- package/lib/module/jest-mock.js +2 -0
- package/lib/module/jest-mock.js.map +1 -1
- package/lib/module/version.json +1 -1
- package/lib/typescript/NativeRNWhisper.d.ts +1 -0
- package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNWhisper.ts +1 -0
- package/src/jest-mock.ts +2 -0
- package/src/version.json +1 -1
|
@@ -646,7 +646,7 @@ static void gemm_q4_b32_8x8_q8_0_lut_avx(int n, float * WSP_GGML_RESTRICT s, siz
|
|
|
646
646
|
__m256i requiredOrder = _mm256_set_epi32(3, 2, 1, 0, 7, 6, 5, 4);
|
|
647
647
|
int64_t xstart = 0;
|
|
648
648
|
int anr = nr - nr%16; // Used to align nr with boundary of 16
|
|
649
|
-
#
|
|
649
|
+
#if defined(__AVX512BW__) && defined(__AVX512DQ__)
|
|
650
650
|
int anc = nc - nc%16; // Used to align nc with boundary of 16
|
|
651
651
|
// Mask to mask out nibbles from packed bytes expanded to 512 bit length
|
|
652
652
|
const __m512i m4bexpanded = _mm512_set1_epi8(0x0F);
|
|
@@ -1041,7 +1041,7 @@ static void gemm_q4_b32_8x8_q8_0_lut_avx(int n, float * WSP_GGML_RESTRICT s, siz
|
|
|
1041
1041
|
xstart = anc/8;
|
|
1042
1042
|
y = 0;
|
|
1043
1043
|
}
|
|
1044
|
-
#endif //
|
|
1044
|
+
#endif // __AVX512BW__ && __AVX512DQ__
|
|
1045
1045
|
|
|
1046
1046
|
// Take group of four block_q8_0x4 structures at each pass of the loop and perform dot product operation
|
|
1047
1047
|
|
|
@@ -1989,7 +1989,7 @@ void wsp_ggml_gemm_q4_K_8x8_q8_K(int n, float * WSP_GGML_RESTRICT s, size_t bs,
|
|
|
1989
1989
|
__m256i requiredOrder = _mm256_set_epi32(3, 2, 1, 0, 7, 6, 5, 4);
|
|
1990
1990
|
int64_t xstart = 0;
|
|
1991
1991
|
int anr = nr - nr % 16;; // Used to align nr with boundary of 16
|
|
1992
|
-
#
|
|
1992
|
+
#if defined(__AVX512BW__) && defined(__AVX512DQ__)
|
|
1993
1993
|
int anc = nc - nc % 16; // Used to align nc with boundary of 16
|
|
1994
1994
|
// Mask to mask out nibbles from packed bytes expanded to 512 bit length
|
|
1995
1995
|
const __m512i m4bexpanded = _mm512_set1_epi8(0x0F);
|
|
@@ -2727,7 +2727,7 @@ void wsp_ggml_gemm_q4_K_8x8_q8_K(int n, float * WSP_GGML_RESTRICT s, size_t bs,
|
|
|
2727
2727
|
xstart = anc/8;
|
|
2728
2728
|
y = 0;
|
|
2729
2729
|
}
|
|
2730
|
-
#endif //
|
|
2730
|
+
#endif // __AVX512BW__ && __AVX512DQ__
|
|
2731
2731
|
|
|
2732
2732
|
// Take group of four block_q8_Kx4 structures at each pass of the loop and perform dot product operation
|
|
2733
2733
|
for (; y < anr / 4; y += 4) {
|
|
@@ -3467,7 +3467,7 @@ void wsp_ggml_gemm_q2_K_8x8_q8_K(int n, float * WSP_GGML_RESTRICT s, size_t bs,
|
|
|
3467
3467
|
__m256i scalesmask2 = _mm256_castsi128_si256(scalesmask2_sse);
|
|
3468
3468
|
scalesmask2 = _mm256_permute2f128_si256(scalesmask2, scalesmask2, 0);
|
|
3469
3469
|
|
|
3470
|
-
#
|
|
3470
|
+
#if defined(__AVX512BW__) && defined(__AVX512DQ__)
|
|
3471
3471
|
|
|
3472
3472
|
int anc = nc - nc % 16; // Used to align nc with boundary of 16
|
|
3473
3473
|
|
|
@@ -4947,7 +4947,7 @@ void wsp_ggml_gemm_q2_K_8x8_q8_K(int n, float * WSP_GGML_RESTRICT s, size_t bs,
|
|
|
4947
4947
|
y = 0;
|
|
4948
4948
|
}
|
|
4949
4949
|
|
|
4950
|
-
#endif //
|
|
4950
|
+
#endif // __AVX512BW__ && __AVX512DQ__
|
|
4951
4951
|
|
|
4952
4952
|
// Take group of four block_q8_Kx4 structures at each pass of the loop and perform dot product operation
|
|
4953
4953
|
for (; y < anr / 4; y += 4) {
|
|
@@ -33,39 +33,52 @@
|
|
|
33
33
|
// repack.cpp
|
|
34
34
|
#define wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic wsp_ggml_wsp_quantize_mat_q8_0_4x4
|
|
35
35
|
#define wsp_ggml_wsp_quantize_mat_q8_0_4x8_generic wsp_ggml_wsp_quantize_mat_q8_0_4x8
|
|
36
|
+
#define wsp_ggml_wsp_quantize_mat_q8_K_4x4_generic wsp_ggml_wsp_quantize_mat_q8_K_4x4
|
|
36
37
|
#define wsp_ggml_wsp_quantize_mat_q8_K_4x8_generic wsp_ggml_wsp_quantize_mat_q8_K_4x8
|
|
37
38
|
#define wsp_ggml_gemv_q4_0_4x4_q8_0_generic wsp_ggml_gemv_q4_0_4x4_q8_0
|
|
38
39
|
#define wsp_ggml_gemv_q4_0_4x8_q8_0_generic wsp_ggml_gemv_q4_0_4x8_q8_0
|
|
39
40
|
#define wsp_ggml_gemv_q4_0_8x8_q8_0_generic wsp_ggml_gemv_q4_0_8x8_q8_0
|
|
41
|
+
#define wsp_ggml_gemv_q4_K_8x4_q8_K_generic wsp_ggml_gemv_q4_K_8x4_q8_K
|
|
40
42
|
#define wsp_ggml_gemv_q4_K_8x8_q8_K_generic wsp_ggml_gemv_q4_K_8x8_q8_K
|
|
41
43
|
#define wsp_ggml_gemv_q2_K_8x8_q8_K_generic wsp_ggml_gemv_q2_K_8x8_q8_K
|
|
42
44
|
#define wsp_ggml_gemv_iq4_nl_4x4_q8_0_generic wsp_ggml_gemv_iq4_nl_4x4_q8_0
|
|
43
45
|
#define wsp_ggml_gemv_iq4_nl_8x8_q8_0_generic wsp_ggml_gemv_iq4_nl_8x8_q8_0
|
|
46
|
+
#define wsp_ggml_gemv_q8_0_4x4_q8_0_generic wsp_ggml_gemv_q8_0_4x4_q8_0
|
|
47
|
+
#define wsp_ggml_gemv_q8_0_4x8_q8_0_generic wsp_ggml_gemv_q8_0_4x8_q8_0
|
|
44
48
|
#define wsp_ggml_gemm_q4_0_4x4_q8_0_generic wsp_ggml_gemm_q4_0_4x4_q8_0
|
|
45
49
|
#define wsp_ggml_gemm_q4_0_4x8_q8_0_generic wsp_ggml_gemm_q4_0_4x8_q8_0
|
|
46
50
|
#define wsp_ggml_gemm_q4_0_8x8_q8_0_generic wsp_ggml_gemm_q4_0_8x8_q8_0
|
|
51
|
+
#define wsp_ggml_gemm_q4_K_8x4_q8_K_generic wsp_ggml_gemm_q4_K_8x4_q8_K
|
|
47
52
|
#define wsp_ggml_gemm_q4_K_8x8_q8_K_generic wsp_ggml_gemm_q4_K_8x8_q8_K
|
|
48
53
|
#define wsp_ggml_gemm_q2_K_8x8_q8_K_generic wsp_ggml_gemm_q2_K_8x8_q8_K
|
|
49
54
|
#define wsp_ggml_gemm_iq4_nl_4x4_q8_0_generic wsp_ggml_gemm_iq4_nl_4x4_q8_0
|
|
50
55
|
#define wsp_ggml_gemm_iq4_nl_8x8_q8_0_generic wsp_ggml_gemm_iq4_nl_8x8_q8_0
|
|
56
|
+
#define wsp_ggml_gemm_q8_0_4x4_q8_0_generic wsp_ggml_gemm_q8_0_4x4_q8_0
|
|
57
|
+
#define wsp_ggml_gemm_q8_0_4x8_q8_0_generic wsp_ggml_gemm_q8_0_4x8_q8_0
|
|
51
58
|
#elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM) || defined(_M_ARM64)
|
|
52
59
|
// repack.cpp
|
|
60
|
+
#define wsp_ggml_wsp_quantize_mat_q8_K_4x4_generic wsp_ggml_wsp_quantize_mat_q8_K_4x4
|
|
53
61
|
#define wsp_ggml_wsp_quantize_mat_q8_K_4x8_generic wsp_ggml_wsp_quantize_mat_q8_K_4x8
|
|
54
|
-
#define wsp_ggml_gemv_q4_K_8x8_q8_K_generic wsp_ggml_gemv_q4_K_8x8_q8_K
|
|
55
62
|
#define wsp_ggml_gemv_iq4_nl_8x8_q8_0_generic wsp_ggml_gemv_iq4_nl_8x8_q8_0
|
|
56
63
|
#define wsp_ggml_gemv_q2_K_8x8_q8_K_generic wsp_ggml_gemv_q2_K_8x8_q8_K
|
|
57
|
-
#define wsp_ggml_gemm_q4_K_8x8_q8_K_generic wsp_ggml_gemm_q4_K_8x8_q8_K
|
|
58
64
|
#define wsp_ggml_gemm_iq4_nl_8x8_q8_0_generic wsp_ggml_gemm_iq4_nl_8x8_q8_0
|
|
59
65
|
#define wsp_ggml_gemm_q2_K_8x8_q8_K_generic wsp_ggml_gemm_q2_K_8x8_q8_K
|
|
60
66
|
#elif defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)
|
|
61
67
|
// repack.cpp
|
|
62
68
|
#define wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic wsp_ggml_wsp_quantize_mat_q8_0_4x4
|
|
69
|
+
#define wsp_ggml_wsp_quantize_mat_q8_K_4x4_generic wsp_ggml_wsp_quantize_mat_q8_K_4x4
|
|
63
70
|
#define wsp_ggml_gemv_q4_0_4x4_q8_0_generic wsp_ggml_gemv_q4_0_4x4_q8_0
|
|
64
71
|
#define wsp_ggml_gemv_q4_0_4x8_q8_0_generic wsp_ggml_gemv_q4_0_4x8_q8_0
|
|
72
|
+
#define wsp_ggml_gemv_q4_K_8x4_q8_K_generic wsp_ggml_gemv_q4_K_8x4_q8_K
|
|
65
73
|
#define wsp_ggml_gemv_iq4_nl_4x4_q8_0_generic wsp_ggml_gemv_iq4_nl_4x4_q8_0
|
|
74
|
+
#define wsp_ggml_gemv_q8_0_4x4_q8_0_generic wsp_ggml_gemv_q8_0_4x4_q8_0
|
|
75
|
+
#define wsp_ggml_gemv_q8_0_4x8_q8_0_generic wsp_ggml_gemv_q8_0_4x8_q8_0
|
|
66
76
|
#define wsp_ggml_gemm_q4_0_4x4_q8_0_generic wsp_ggml_gemm_q4_0_4x4_q8_0
|
|
67
77
|
#define wsp_ggml_gemm_q4_0_4x8_q8_0_generic wsp_ggml_gemm_q4_0_4x8_q8_0
|
|
78
|
+
#define wsp_ggml_gemm_q4_K_8x4_q8_K_generic wsp_ggml_gemm_q4_K_8x4_q8_K
|
|
68
79
|
#define wsp_ggml_gemm_iq4_nl_4x4_q8_0_generic wsp_ggml_gemm_iq4_nl_4x4_q8_0
|
|
80
|
+
#define wsp_ggml_gemm_q8_0_4x4_q8_0_generic wsp_ggml_gemm_q8_0_4x4_q8_0
|
|
81
|
+
#define wsp_ggml_gemm_q8_0_4x8_q8_0_generic wsp_ggml_gemm_q8_0_4x8_q8_0
|
|
69
82
|
#elif defined(__POWERPC__) || defined(__powerpc__)
|
|
70
83
|
// ref: https://github.com/ggml-org/llama.cpp/pull/14146#issuecomment-2972561679
|
|
71
84
|
// quants.c
|
|
@@ -76,21 +89,28 @@
|
|
|
76
89
|
// repack.cpp
|
|
77
90
|
#define wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic wsp_ggml_wsp_quantize_mat_q8_0_4x4
|
|
78
91
|
#define wsp_ggml_wsp_quantize_mat_q8_0_4x8_generic wsp_ggml_wsp_quantize_mat_q8_0_4x8
|
|
92
|
+
#define wsp_ggml_wsp_quantize_mat_q8_K_4x4_generic wsp_ggml_wsp_quantize_mat_q8_K_4x4
|
|
79
93
|
#define wsp_ggml_wsp_quantize_mat_q8_K_4x8_generic wsp_ggml_wsp_quantize_mat_q8_K_4x8
|
|
80
94
|
#define wsp_ggml_gemv_q4_0_4x4_q8_0_generic wsp_ggml_gemv_q4_0_4x4_q8_0
|
|
81
95
|
#define wsp_ggml_gemv_q4_0_4x8_q8_0_generic wsp_ggml_gemv_q4_0_4x8_q8_0
|
|
82
96
|
#define wsp_ggml_gemv_q4_0_8x8_q8_0_generic wsp_ggml_gemv_q4_0_8x8_q8_0
|
|
97
|
+
#define wsp_ggml_gemv_q4_K_8x4_q8_K_generic wsp_ggml_gemv_q4_K_8x4_q8_K
|
|
83
98
|
#define wsp_ggml_gemv_q4_K_8x8_q8_K_generic wsp_ggml_gemv_q4_K_8x8_q8_K
|
|
84
99
|
#define wsp_ggml_gemv_q2_K_8x8_q8_K_generic wsp_ggml_gemv_q2_K_8x8_q8_K
|
|
85
100
|
#define wsp_ggml_gemv_iq4_nl_4x4_q8_0_generic wsp_ggml_gemv_iq4_nl_4x4_q8_0
|
|
86
101
|
#define wsp_ggml_gemv_iq4_nl_8x8_q8_0_generic wsp_ggml_gemv_iq4_nl_8x8_q8_0
|
|
102
|
+
#define wsp_ggml_gemv_q8_0_4x4_q8_0_generic wsp_ggml_gemv_q8_0_4x4_q8_0
|
|
103
|
+
#define wsp_ggml_gemv_q8_0_4x8_q8_0_generic wsp_ggml_gemv_q8_0_4x8_q8_0
|
|
87
104
|
#define wsp_ggml_gemm_q4_0_4x4_q8_0_generic wsp_ggml_gemm_q4_0_4x4_q8_0
|
|
88
105
|
#define wsp_ggml_gemm_q4_0_4x8_q8_0_generic wsp_ggml_gemm_q4_0_4x8_q8_0
|
|
89
106
|
#define wsp_ggml_gemm_q4_0_8x8_q8_0_generic wsp_ggml_gemm_q4_0_8x8_q8_0
|
|
107
|
+
#define wsp_ggml_gemm_q4_K_8x4_q8_K_generic wsp_ggml_gemm_q4_K_8x4_q8_K
|
|
90
108
|
#define wsp_ggml_gemm_q4_K_8x8_q8_K_generic wsp_ggml_gemm_q4_K_8x8_q8_K
|
|
91
109
|
#define wsp_ggml_gemm_q2_K_8x8_q8_K_generic wsp_ggml_gemm_q2_K_8x8_q8_K
|
|
92
110
|
#define wsp_ggml_gemm_iq4_nl_4x4_q8_0_generic wsp_ggml_gemm_iq4_nl_4x4_q8_0
|
|
93
111
|
#define wsp_ggml_gemm_iq4_nl_8x8_q8_0_generic wsp_ggml_gemm_iq4_nl_8x8_q8_0
|
|
112
|
+
#define wsp_ggml_gemm_q8_0_4x4_q8_0_generic wsp_ggml_gemm_q8_0_4x4_q8_0
|
|
113
|
+
#define wsp_ggml_gemm_q8_0_4x8_q8_0_generic wsp_ggml_gemm_q8_0_4x8_q8_0
|
|
94
114
|
#elif defined(__loongarch64)
|
|
95
115
|
// quants.c
|
|
96
116
|
#define wsp_quantize_row_q8_K_generic wsp_quantize_row_q8_K
|
|
@@ -101,21 +121,28 @@
|
|
|
101
121
|
// repack.cpp
|
|
102
122
|
#define wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic wsp_ggml_wsp_quantize_mat_q8_0_4x4
|
|
103
123
|
#define wsp_ggml_wsp_quantize_mat_q8_0_4x8_generic wsp_ggml_wsp_quantize_mat_q8_0_4x8
|
|
124
|
+
#define wsp_ggml_wsp_quantize_mat_q8_K_4x4_generic wsp_ggml_wsp_quantize_mat_q8_K_4x4
|
|
104
125
|
#define wsp_ggml_wsp_quantize_mat_q8_K_4x8_generic wsp_ggml_wsp_quantize_mat_q8_K_4x8
|
|
105
126
|
#define wsp_ggml_gemv_q4_0_4x4_q8_0_generic wsp_ggml_gemv_q4_0_4x4_q8_0
|
|
106
127
|
#define wsp_ggml_gemv_q4_0_4x8_q8_0_generic wsp_ggml_gemv_q4_0_4x8_q8_0
|
|
107
128
|
#define wsp_ggml_gemv_q4_0_8x8_q8_0_generic wsp_ggml_gemv_q4_0_8x8_q8_0
|
|
129
|
+
#define wsp_ggml_gemv_q4_K_8x4_q8_K_generic wsp_ggml_gemv_q4_K_8x4_q8_K
|
|
108
130
|
#define wsp_ggml_gemv_q4_K_8x8_q8_K_generic wsp_ggml_gemv_q4_K_8x8_q8_K
|
|
109
131
|
#define wsp_ggml_gemv_q2_K_8x8_q8_K_generic wsp_ggml_gemv_q2_K_8x8_q8_K
|
|
110
132
|
#define wsp_ggml_gemv_iq4_nl_4x4_q8_0_generic wsp_ggml_gemv_iq4_nl_4x4_q8_0
|
|
111
133
|
#define wsp_ggml_gemv_iq4_nl_8x8_q8_0_generic wsp_ggml_gemv_iq4_nl_8x8_q8_0
|
|
134
|
+
#define wsp_ggml_gemv_q8_0_4x4_q8_0_generic wsp_ggml_gemv_q8_0_4x4_q8_0
|
|
135
|
+
#define wsp_ggml_gemv_q8_0_4x8_q8_0_generic wsp_ggml_gemv_q8_0_4x8_q8_0
|
|
112
136
|
#define wsp_ggml_gemm_q4_0_4x4_q8_0_generic wsp_ggml_gemm_q4_0_4x4_q8_0
|
|
113
137
|
#define wsp_ggml_gemm_q4_0_4x8_q8_0_generic wsp_ggml_gemm_q4_0_4x8_q8_0
|
|
114
138
|
#define wsp_ggml_gemm_q4_0_8x8_q8_0_generic wsp_ggml_gemm_q4_0_8x8_q8_0
|
|
139
|
+
#define wsp_ggml_gemm_q4_K_8x4_q8_K_generic wsp_ggml_gemm_q4_K_8x4_q8_K
|
|
115
140
|
#define wsp_ggml_gemm_q4_K_8x8_q8_K_generic wsp_ggml_gemm_q4_K_8x8_q8_K
|
|
116
141
|
#define wsp_ggml_gemm_q2_K_8x8_q8_K_generic wsp_ggml_gemm_q2_K_8x8_q8_K
|
|
117
142
|
#define wsp_ggml_gemm_iq4_nl_4x4_q8_0_generic wsp_ggml_gemm_iq4_nl_4x4_q8_0
|
|
118
143
|
#define wsp_ggml_gemm_iq4_nl_8x8_q8_0_generic wsp_ggml_gemm_iq4_nl_8x8_q8_0
|
|
144
|
+
#define wsp_ggml_gemm_q8_0_4x4_q8_0_generic wsp_ggml_gemm_q8_0_4x4_q8_0
|
|
145
|
+
#define wsp_ggml_gemm_q8_0_4x8_q8_0_generic wsp_ggml_gemm_q8_0_4x8_q8_0
|
|
119
146
|
#elif defined(__riscv)
|
|
120
147
|
// quants.c
|
|
121
148
|
#define wsp_quantize_row_q8_K_generic wsp_quantize_row_q8_K
|
|
@@ -134,19 +161,26 @@
|
|
|
134
161
|
// repack.cpp
|
|
135
162
|
#define wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic wsp_ggml_wsp_quantize_mat_q8_0_4x4
|
|
136
163
|
#define wsp_ggml_wsp_quantize_mat_q8_0_4x8_generic wsp_ggml_wsp_quantize_mat_q8_0_4x8
|
|
164
|
+
#define wsp_ggml_wsp_quantize_mat_q8_K_4x4_generic wsp_ggml_wsp_quantize_mat_q8_K_4x4
|
|
137
165
|
#define wsp_ggml_wsp_quantize_mat_q8_K_4x8_generic wsp_ggml_wsp_quantize_mat_q8_K_4x8
|
|
138
166
|
#define wsp_ggml_gemv_q4_0_4x4_q8_0_generic wsp_ggml_gemv_q4_0_4x4_q8_0
|
|
139
167
|
#define wsp_ggml_gemv_q4_0_4x8_q8_0_generic wsp_ggml_gemv_q4_0_4x8_q8_0
|
|
168
|
+
#define wsp_ggml_gemv_q4_K_8x4_q8_K_generic wsp_ggml_gemv_q4_K_8x4_q8_K
|
|
140
169
|
#define wsp_ggml_gemv_q4_K_8x8_q8_K_generic wsp_ggml_gemv_q4_K_8x8_q8_K
|
|
141
170
|
#define wsp_ggml_gemv_q2_K_8x8_q8_K_generic wsp_ggml_gemv_q2_K_8x8_q8_K
|
|
142
171
|
#define wsp_ggml_gemv_iq4_nl_4x4_q8_0_generic wsp_ggml_gemv_iq4_nl_4x4_q8_0
|
|
143
172
|
#define wsp_ggml_gemv_iq4_nl_8x8_q8_0_generic wsp_ggml_gemv_iq4_nl_8x8_q8_0
|
|
173
|
+
#define wsp_ggml_gemv_q8_0_4x4_q8_0_generic wsp_ggml_gemv_q8_0_4x4_q8_0
|
|
174
|
+
#define wsp_ggml_gemv_q8_0_4x8_q8_0_generic wsp_ggml_gemv_q8_0_4x8_q8_0
|
|
144
175
|
#define wsp_ggml_gemm_q4_0_4x4_q8_0_generic wsp_ggml_gemm_q4_0_4x4_q8_0
|
|
145
176
|
#define wsp_ggml_gemm_q4_0_4x8_q8_0_generic wsp_ggml_gemm_q4_0_4x8_q8_0
|
|
177
|
+
#define wsp_ggml_gemm_q4_K_8x4_q8_K_generic wsp_ggml_gemm_q4_K_8x4_q8_K
|
|
146
178
|
#define wsp_ggml_gemm_q4_K_8x8_q8_K_generic wsp_ggml_gemm_q4_K_8x8_q8_K
|
|
147
179
|
#define wsp_ggml_gemm_q2_K_8x8_q8_K_generic wsp_ggml_gemm_q2_K_8x8_q8_K
|
|
148
180
|
#define wsp_ggml_gemm_iq4_nl_4x4_q8_0_generic wsp_ggml_gemm_iq4_nl_4x4_q8_0
|
|
149
181
|
#define wsp_ggml_gemm_iq4_nl_8x8_q8_0_generic wsp_ggml_gemm_iq4_nl_8x8_q8_0
|
|
182
|
+
#define wsp_ggml_gemm_q8_0_4x4_q8_0_generic wsp_ggml_gemm_q8_0_4x4_q8_0
|
|
183
|
+
#define wsp_ggml_gemm_q8_0_4x8_q8_0_generic wsp_ggml_gemm_q8_0_4x8_q8_0
|
|
150
184
|
#elif defined(__s390x__)
|
|
151
185
|
// quants.c
|
|
152
186
|
#define wsp_quantize_row_q8_K_generic wsp_quantize_row_q8_K
|
|
@@ -163,21 +197,28 @@
|
|
|
163
197
|
// repack.cpp
|
|
164
198
|
#define wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic wsp_ggml_wsp_quantize_mat_q8_0_4x4
|
|
165
199
|
#define wsp_ggml_wsp_quantize_mat_q8_0_4x8_generic wsp_ggml_wsp_quantize_mat_q8_0_4x8
|
|
200
|
+
#define wsp_ggml_wsp_quantize_mat_q8_K_4x4_generic wsp_ggml_wsp_quantize_mat_q8_K_4x4
|
|
166
201
|
#define wsp_ggml_wsp_quantize_mat_q8_K_4x8_generic wsp_ggml_wsp_quantize_mat_q8_K_4x8
|
|
167
202
|
#define wsp_ggml_gemv_q4_0_4x4_q8_0_generic wsp_ggml_gemv_q4_0_4x4_q8_0
|
|
168
203
|
#define wsp_ggml_gemv_q4_0_4x8_q8_0_generic wsp_ggml_gemv_q4_0_4x8_q8_0
|
|
169
204
|
#define wsp_ggml_gemv_q4_0_8x8_q8_0_generic wsp_ggml_gemv_q4_0_8x8_q8_0
|
|
205
|
+
#define wsp_ggml_gemv_q4_K_8x4_q8_K_generic wsp_ggml_gemv_q4_K_8x4_q8_K
|
|
170
206
|
#define wsp_ggml_gemv_q4_K_8x8_q8_K_generic wsp_ggml_gemv_q4_K_8x8_q8_K
|
|
171
207
|
#define wsp_ggml_gemv_q2_K_8x8_q8_K_generic wsp_ggml_gemv_q2_K_8x8_q8_K
|
|
172
208
|
#define wsp_ggml_gemv_iq4_nl_4x4_q8_0_generic wsp_ggml_gemv_iq4_nl_4x4_q8_0
|
|
173
209
|
#define wsp_ggml_gemv_iq4_nl_8x8_q8_0_generic wsp_ggml_gemv_iq4_nl_8x8_q8_0
|
|
210
|
+
#define wsp_ggml_gemv_q8_0_4x4_q8_0_generic wsp_ggml_gemv_q8_0_4x4_q8_0
|
|
211
|
+
#define wsp_ggml_gemv_q8_0_4x8_q8_0_generic wsp_ggml_gemv_q8_0_4x8_q8_0
|
|
174
212
|
#define wsp_ggml_gemm_q4_0_4x4_q8_0_generic wsp_ggml_gemm_q4_0_4x4_q8_0
|
|
175
213
|
#define wsp_ggml_gemm_q4_0_4x8_q8_0_generic wsp_ggml_gemm_q4_0_4x8_q8_0
|
|
176
214
|
#define wsp_ggml_gemm_q4_0_8x8_q8_0_generic wsp_ggml_gemm_q4_0_8x8_q8_0
|
|
215
|
+
#define wsp_ggml_gemm_q4_K_8x4_q8_K_generic wsp_ggml_gemm_q4_K_8x4_q8_K
|
|
177
216
|
#define wsp_ggml_gemm_q4_K_8x8_q8_K_generic wsp_ggml_gemm_q4_K_8x8_q8_K
|
|
178
217
|
#define wsp_ggml_gemm_q2_K_8x8_q8_K_generic wsp_ggml_gemm_q2_K_8x8_q8_K
|
|
179
218
|
#define wsp_ggml_gemm_iq4_nl_4x4_q8_0_generic wsp_ggml_gemm_iq4_nl_4x4_q8_0
|
|
180
219
|
#define wsp_ggml_gemm_iq4_nl_8x8_q8_0_generic wsp_ggml_gemm_iq4_nl_8x8_q8_0
|
|
220
|
+
#define wsp_ggml_gemm_q8_0_4x4_q8_0_generic wsp_ggml_gemm_q8_0_4x4_q8_0
|
|
221
|
+
#define wsp_ggml_gemm_q8_0_4x8_q8_0_generic wsp_ggml_gemm_q8_0_4x8_q8_0
|
|
181
222
|
#elif defined(__wasm__)
|
|
182
223
|
// quants.c
|
|
183
224
|
#define wsp_ggml_vec_dot_q4_1_q8_1_generic wsp_ggml_vec_dot_q4_1_q8_1
|
|
@@ -196,19 +237,26 @@
|
|
|
196
237
|
// repack.cpp
|
|
197
238
|
#define wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic wsp_ggml_wsp_quantize_mat_q8_0_4x4
|
|
198
239
|
#define wsp_ggml_wsp_quantize_mat_q8_0_4x8_generic wsp_ggml_wsp_quantize_mat_q8_0_4x8
|
|
240
|
+
#define wsp_ggml_wsp_quantize_mat_q8_K_4x4_generic wsp_ggml_wsp_quantize_mat_q8_K_4x4
|
|
199
241
|
#define wsp_ggml_wsp_quantize_mat_q8_K_4x8_generic wsp_ggml_wsp_quantize_mat_q8_K_4x8
|
|
200
242
|
#define wsp_ggml_gemv_q4_0_4x4_q8_0_generic wsp_ggml_gemv_q4_0_4x4_q8_0
|
|
201
243
|
#define wsp_ggml_gemv_q4_0_4x8_q8_0_generic wsp_ggml_gemv_q4_0_4x8_q8_0
|
|
202
244
|
#define wsp_ggml_gemv_q4_0_8x8_q8_0_generic wsp_ggml_gemv_q4_0_8x8_q8_0
|
|
245
|
+
#define wsp_ggml_gemv_q4_K_8x4_q8_K_generic wsp_ggml_gemv_q4_K_8x4_q8_K
|
|
203
246
|
#define wsp_ggml_gemv_q4_K_8x8_q8_K_generic wsp_ggml_gemv_q4_K_8x8_q8_K
|
|
204
247
|
#define wsp_ggml_gemv_q2_K_8x8_q8_K_generic wsp_ggml_gemv_q2_K_8x8_q8_K
|
|
205
248
|
#define wsp_ggml_gemv_iq4_nl_4x4_q8_0_generic wsp_ggml_gemv_iq4_nl_4x4_q8_0
|
|
206
249
|
#define wsp_ggml_gemv_iq4_nl_8x8_q8_0_generic wsp_ggml_gemv_iq4_nl_8x8_q8_0
|
|
250
|
+
#define wsp_ggml_gemv_q8_0_4x4_q8_0_generic wsp_ggml_gemv_q8_0_4x4_q8_0
|
|
251
|
+
#define wsp_ggml_gemv_q8_0_4x8_q8_0_generic wsp_ggml_gemv_q8_0_4x8_q8_0
|
|
207
252
|
#define wsp_ggml_gemm_q4_0_4x4_q8_0_generic wsp_ggml_gemm_q4_0_4x4_q8_0
|
|
208
253
|
#define wsp_ggml_gemm_q4_0_4x8_q8_0_generic wsp_ggml_gemm_q4_0_4x8_q8_0
|
|
209
254
|
#define wsp_ggml_gemm_q4_0_8x8_q8_0_generic wsp_ggml_gemm_q4_0_8x8_q8_0
|
|
255
|
+
#define wsp_ggml_gemm_q4_K_8x4_q8_K_generic wsp_ggml_gemm_q4_K_8x4_q8_K
|
|
210
256
|
#define wsp_ggml_gemm_q4_K_8x8_q8_K_generic wsp_ggml_gemm_q4_K_8x8_q8_K
|
|
211
257
|
#define wsp_ggml_gemm_q2_K_8x8_q8_K_generic wsp_ggml_gemm_q2_K_8x8_q8_K
|
|
212
258
|
#define wsp_ggml_gemm_iq4_nl_4x4_q8_0_generic wsp_ggml_gemm_iq4_nl_4x4_q8_0
|
|
213
259
|
#define wsp_ggml_gemm_iq4_nl_8x8_q8_0_generic wsp_ggml_gemm_iq4_nl_8x8_q8_0
|
|
260
|
+
#define wsp_ggml_gemm_q8_0_4x4_q8_0_generic wsp_ggml_gemm_q8_0_4x4_q8_0
|
|
261
|
+
#define wsp_ggml_gemm_q8_0_4x8_q8_0_generic wsp_ggml_gemm_q8_0_4x8_q8_0
|
|
214
262
|
#endif
|
|
@@ -328,7 +328,7 @@ inline static int32x4_t wsp_ggml_vdotq_s32(int32x4_t acc, int8x16_t a, int8x16_t
|
|
|
328
328
|
|
|
329
329
|
#if defined(_MSC_VER) || defined(__MINGW32__)
|
|
330
330
|
#include <intrin.h>
|
|
331
|
-
#elif defined(
|
|
331
|
+
#elif defined(__SSE__) || defined(__SSE3__) || defined(__SSSE3__) || defined(__AVX__) || defined(__F16C__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__AVX512BF16__)
|
|
332
332
|
#include <immintrin.h>
|
|
333
333
|
#endif
|
|
334
334
|
|
package/cpp/ggml-cpu/ggml-cpu.c
CHANGED
|
@@ -81,6 +81,11 @@ struct wsp_ggml_arm_arch_features_type {
|
|
|
81
81
|
} wsp_ggml_arm_arch_features = { 0 };
|
|
82
82
|
#endif
|
|
83
83
|
|
|
84
|
+
#if defined(__riscv)
|
|
85
|
+
struct wsp_ggml_riscv_arch_features_type {
|
|
86
|
+
int rvv_vlen;
|
|
87
|
+
} wsp_ggml_riscv_arch_features = { 0 };
|
|
88
|
+
#endif
|
|
84
89
|
|
|
85
90
|
#if defined(_WIN32)
|
|
86
91
|
|
|
@@ -187,6 +192,9 @@ typedef void * thread_ret_t;
|
|
|
187
192
|
|
|
188
193
|
typedef pthread_t wsp_ggml_thread_t;
|
|
189
194
|
|
|
195
|
+
#define WSP_GGML_THREADPOOL_N_THREADS_MASK (0xffffU)
|
|
196
|
+
#define WSP_GGML_THREADPOOL_N_THREADS_BITS (16)
|
|
197
|
+
|
|
190
198
|
#if defined(__APPLE__)
|
|
191
199
|
#include <unistd.h>
|
|
192
200
|
#include <mach/mach.h>
|
|
@@ -449,7 +457,7 @@ struct wsp_ggml_threadpool {
|
|
|
449
457
|
struct wsp_ggml_cplan * cplan;
|
|
450
458
|
|
|
451
459
|
// synchronization primitives
|
|
452
|
-
atomic_int n_graph; //
|
|
460
|
+
atomic_int n_graph; // updated when there is work to be done (i.e each graph) holds graph and active thread counts.
|
|
453
461
|
atomic_int WSP_GGML_CACHE_ALIGN n_barrier;
|
|
454
462
|
atomic_int WSP_GGML_CACHE_ALIGN n_barrier_passed;
|
|
455
463
|
atomic_int WSP_GGML_CACHE_ALIGN current_chunk; // currently processing chunk during Mat_Mul, shared between all the threads.
|
|
@@ -457,12 +465,10 @@ struct wsp_ggml_threadpool {
|
|
|
457
465
|
// these are atomic as an annotation for thread-sanitizer
|
|
458
466
|
atomic_bool stop; // Used for stopping the threadpool altogether
|
|
459
467
|
atomic_bool pause; // Used for pausing the threadpool or individual threads
|
|
460
|
-
atomic_int
|
|
468
|
+
atomic_int abort; // Used for aborting processing of a graph
|
|
461
469
|
|
|
462
470
|
struct wsp_ggml_compute_state * workers; // per thread state
|
|
463
|
-
int
|
|
464
|
-
atomic_int n_threads_cur; // number of threads used in the current graph
|
|
465
|
-
|
|
471
|
+
int n_threads; // Number of threads in the pool
|
|
466
472
|
int32_t prio; // Scheduling priority
|
|
467
473
|
uint32_t poll; // Polling level (0 - no polling)
|
|
468
474
|
|
|
@@ -490,6 +496,15 @@ static inline void wsp_ggml_thread_cpu_relax(void) {
|
|
|
490
496
|
static inline void wsp_ggml_thread_cpu_relax(void) {
|
|
491
497
|
_mm_pause();
|
|
492
498
|
}
|
|
499
|
+
#elif defined(__riscv)
|
|
500
|
+
static inline void wsp_ggml_thread_cpu_relax(void) {
|
|
501
|
+
#ifdef __riscv_zihintpause
|
|
502
|
+
__asm__ __volatile__ ("pause");
|
|
503
|
+
#else
|
|
504
|
+
/* Encoding of the pause instruction */
|
|
505
|
+
__asm__ __volatile__ (".4byte 0x100000F");
|
|
506
|
+
#endif
|
|
507
|
+
}
|
|
493
508
|
#else
|
|
494
509
|
static inline void wsp_ggml_thread_cpu_relax(void) {;}
|
|
495
510
|
#endif
|
|
@@ -530,7 +545,7 @@ struct wsp_ggml_state {
|
|
|
530
545
|
static struct wsp_ggml_state g_state = {0};
|
|
531
546
|
|
|
532
547
|
void wsp_ggml_barrier(struct wsp_ggml_threadpool * tp) {
|
|
533
|
-
int n_threads = atomic_load_explicit(&tp->
|
|
548
|
+
int n_threads = atomic_load_explicit(&tp->n_graph, memory_order_relaxed) & WSP_GGML_THREADPOOL_N_THREADS_MASK;
|
|
534
549
|
if (n_threads == 1) {
|
|
535
550
|
return;
|
|
536
551
|
}
|
|
@@ -547,7 +562,7 @@ void wsp_ggml_barrier(struct wsp_ggml_threadpool * tp) {
|
|
|
547
562
|
// last thread
|
|
548
563
|
atomic_store_explicit(&tp->n_barrier, 0, memory_order_relaxed);
|
|
549
564
|
|
|
550
|
-
// exit barrier (
|
|
565
|
+
// exit barrier (full seq-cst fence)
|
|
551
566
|
atomic_fetch_add_explicit(&tp->n_barrier_passed, 1, memory_order_seq_cst);
|
|
552
567
|
return;
|
|
553
568
|
}
|
|
@@ -683,24 +698,25 @@ bool wsp_ggml_is_numa(void) {
|
|
|
683
698
|
}
|
|
684
699
|
|
|
685
700
|
#if defined(__ARM_ARCH)
|
|
686
|
-
|
|
687
|
-
#if defined(__linux__) && defined(__aarch64__)
|
|
688
|
-
#include <sys/auxv.h>
|
|
689
|
-
#endif
|
|
690
|
-
|
|
691
|
-
static void wsp_ggml_init_arm_arch_features(void) {
|
|
692
701
|
#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE)
|
|
693
|
-
#
|
|
694
|
-
|
|
702
|
+
#include <arm_sve.h>
|
|
703
|
+
static void wsp_ggml_init_arm_arch_features(void) {
|
|
704
|
+
wsp_ggml_arm_arch_features.sve_cnt = svcntb();
|
|
705
|
+
}
|
|
695
706
|
#else
|
|
696
|
-
|
|
697
|
-
#error "TODO: SVE is not supported on this platform. To use SVE, sve_cnt needs to be initialized here."
|
|
698
|
-
#endif
|
|
707
|
+
static void wsp_ggml_init_arm_arch_features(void) {}
|
|
699
708
|
#endif
|
|
700
|
-
}
|
|
701
|
-
|
|
702
709
|
#endif // __ARM_ARCH
|
|
703
710
|
|
|
711
|
+
#if defined(__riscv) && defined(__riscv_v_intrinsic)
|
|
712
|
+
#include <riscv_vector.h>
|
|
713
|
+
static void wsp_ggml_init_riscv_arch_features(void) {
|
|
714
|
+
wsp_ggml_riscv_arch_features.rvv_vlen = __riscv_vlenb();
|
|
715
|
+
}
|
|
716
|
+
#else
|
|
717
|
+
static void wsp_ggml_init_riscv_arch_features(void) {}
|
|
718
|
+
#endif
|
|
719
|
+
|
|
704
720
|
struct wsp_ggml_tensor * wsp_ggml_new_i32(struct wsp_ggml_context * ctx, int32_t value) {
|
|
705
721
|
WSP_GGML_ASSERT(!wsp_ggml_get_no_alloc(ctx));
|
|
706
722
|
|
|
@@ -1927,6 +1943,10 @@ static void wsp_ggml_compute_forward(struct wsp_ggml_compute_params * params, st
|
|
|
1927
1943
|
{
|
|
1928
1944
|
wsp_ggml_compute_forward_argsort(params, tensor);
|
|
1929
1945
|
} break;
|
|
1946
|
+
case WSP_GGML_OP_TOP_K:
|
|
1947
|
+
{
|
|
1948
|
+
wsp_ggml_compute_forward_top_k(params, tensor);
|
|
1949
|
+
} break;
|
|
1930
1950
|
case WSP_GGML_OP_LEAKY_RELU:
|
|
1931
1951
|
{
|
|
1932
1952
|
wsp_ggml_compute_forward_leaky_relu(params, tensor);
|
|
@@ -2311,6 +2331,7 @@ static int wsp_ggml_get_n_tasks(struct wsp_ggml_tensor * node, int n_threads) {
|
|
|
2311
2331
|
case WSP_GGML_OP_ARANGE:
|
|
2312
2332
|
case WSP_GGML_OP_TIMESTEP_EMBEDDING:
|
|
2313
2333
|
case WSP_GGML_OP_ARGSORT:
|
|
2334
|
+
case WSP_GGML_OP_TOP_K:
|
|
2314
2335
|
case WSP_GGML_OP_FLASH_ATTN_EXT:
|
|
2315
2336
|
case WSP_GGML_OP_FLASH_ATTN_BACK:
|
|
2316
2337
|
case WSP_GGML_OP_SSM_CONV:
|
|
@@ -2622,7 +2643,7 @@ static void wsp_ggml_thread_cpumask_next(const bool * global_mask, bool * local_
|
|
|
2622
2643
|
void wsp_ggml_threadpool_free(struct wsp_ggml_threadpool* threadpool) {
|
|
2623
2644
|
if (!threadpool) return;
|
|
2624
2645
|
|
|
2625
|
-
const int n_threads = threadpool->
|
|
2646
|
+
const int n_threads = threadpool->n_threads;
|
|
2626
2647
|
|
|
2627
2648
|
#ifndef WSP_GGML_USE_OPENMP
|
|
2628
2649
|
struct wsp_ggml_compute_state* workers = threadpool->workers;
|
|
@@ -2698,9 +2719,14 @@ struct wsp_ggml_cplan wsp_ggml_graph_plan(
|
|
|
2698
2719
|
//WSP_GGML_PRINT_DEBUG("Threadpool is not specified. Will create a disposable threadpool : n_threads %d\n", n_threads);
|
|
2699
2720
|
}
|
|
2700
2721
|
if (n_threads <= 0) {
|
|
2701
|
-
n_threads = threadpool ? threadpool->
|
|
2722
|
+
n_threads = threadpool ? threadpool->n_threads : WSP_GGML_DEFAULT_N_THREADS;
|
|
2702
2723
|
}
|
|
2703
2724
|
|
|
2725
|
+
#if defined(__EMSCRIPTEN__) && !defined(__EMSCRIPTEN_PTHREADS__)
|
|
2726
|
+
// Emscripten without pthreads support can only use a single thread
|
|
2727
|
+
n_threads = 1;
|
|
2728
|
+
#endif
|
|
2729
|
+
|
|
2704
2730
|
size_t work_size = 0;
|
|
2705
2731
|
|
|
2706
2732
|
struct wsp_ggml_cplan cplan;
|
|
@@ -2834,6 +2860,10 @@ struct wsp_ggml_cplan wsp_ggml_graph_plan(
|
|
|
2834
2860
|
cur += sizeof(wsp_ggml_fp16_t)*ne00*ne01*ne02*ne03;
|
|
2835
2861
|
cur += sizeof(wsp_ggml_fp16_t)*ne10*ne11*ne12;
|
|
2836
2862
|
} break;
|
|
2863
|
+
case WSP_GGML_OP_TOP_K:
|
|
2864
|
+
{
|
|
2865
|
+
cur += sizeof(int32_t)*node->src[0]->ne[0]*n_tasks;
|
|
2866
|
+
} break;
|
|
2837
2867
|
case WSP_GGML_OP_FLASH_ATTN_EXT:
|
|
2838
2868
|
{
|
|
2839
2869
|
const int64_t ne10 = node->src[1]->ne[0]; // DK
|
|
@@ -2897,12 +2927,14 @@ static thread_ret_t wsp_ggml_graph_compute_thread(void * data) {
|
|
|
2897
2927
|
|
|
2898
2928
|
struct wsp_ggml_compute_params params = {
|
|
2899
2929
|
/*.ith =*/ state->ith,
|
|
2900
|
-
/*.nth =*/ atomic_load_explicit(&tp->
|
|
2930
|
+
/*.nth =*/ atomic_load_explicit(&tp->n_graph, memory_order_relaxed) & WSP_GGML_THREADPOOL_N_THREADS_MASK,
|
|
2901
2931
|
/*.wsize =*/ cplan->work_size,
|
|
2902
2932
|
/*.wdata =*/ cplan->work_data,
|
|
2903
2933
|
/*.threadpool=*/ tp,
|
|
2904
2934
|
};
|
|
2905
2935
|
|
|
2936
|
+
WSP_GGML_PRINT_DEBUG("thread #%d compute-start cplan %p last-graph %d \n", state->ith, cplan, state->last_graph);
|
|
2937
|
+
|
|
2906
2938
|
for (int node_n = 0; node_n < cgraph->n_nodes && atomic_load_explicit(&tp->abort, memory_order_relaxed) != node_n; node_n++) {
|
|
2907
2939
|
struct wsp_ggml_tensor * node = cgraph->nodes[node_n];
|
|
2908
2940
|
|
|
@@ -2924,6 +2956,8 @@ static thread_ret_t wsp_ggml_graph_compute_thread(void * data) {
|
|
|
2924
2956
|
}
|
|
2925
2957
|
}
|
|
2926
2958
|
|
|
2959
|
+
WSP_GGML_PRINT_DEBUG("thread #%d compute-done cplan %p last-graph %d \n", state->ith, cplan, state->last_graph);
|
|
2960
|
+
|
|
2927
2961
|
wsp_ggml_barrier(state->threadpool);
|
|
2928
2962
|
|
|
2929
2963
|
return 0;
|
|
@@ -2931,27 +2965,23 @@ static thread_ret_t wsp_ggml_graph_compute_thread(void * data) {
|
|
|
2931
2965
|
|
|
2932
2966
|
#ifndef WSP_GGML_USE_OPENMP
|
|
2933
2967
|
|
|
2934
|
-
// check if thread is active
|
|
2935
|
-
static inline bool wsp_ggml_graph_compute_thread_active(struct wsp_ggml_compute_state * state) {
|
|
2936
|
-
struct wsp_ggml_threadpool * threadpool = state->threadpool;
|
|
2937
|
-
int n_threads = atomic_load_explicit(&threadpool->n_threads_cur, memory_order_relaxed);
|
|
2938
|
-
return (state->ith < n_threads);
|
|
2939
|
-
}
|
|
2940
|
-
|
|
2941
2968
|
// check if thread is ready to proceed (exit from polling or sleeping)
|
|
2969
|
+
// returns true if loops should exit, sets state->pending to indicate new work
|
|
2942
2970
|
static inline bool wsp_ggml_graph_compute_thread_ready(struct wsp_ggml_compute_state * state) {
|
|
2943
2971
|
struct wsp_ggml_threadpool * threadpool = state->threadpool;
|
|
2944
2972
|
|
|
2945
2973
|
if (state->pending || threadpool->stop || threadpool->pause) { return true; }
|
|
2946
2974
|
|
|
2947
2975
|
// check for new graph/work
|
|
2948
|
-
int
|
|
2949
|
-
|
|
2950
|
-
|
|
2951
|
-
state->
|
|
2976
|
+
int n_graph = atomic_load_explicit(&threadpool->n_graph, memory_order_relaxed);
|
|
2977
|
+
int n_threads = n_graph & WSP_GGML_THREADPOOL_N_THREADS_MASK;
|
|
2978
|
+
if (n_graph != state->last_graph) {
|
|
2979
|
+
state->pending = (state->ith < n_threads);
|
|
2980
|
+
state->last_graph = n_graph;
|
|
2981
|
+
return true;
|
|
2952
2982
|
}
|
|
2953
2983
|
|
|
2954
|
-
return
|
|
2984
|
+
return false;
|
|
2955
2985
|
}
|
|
2956
2986
|
|
|
2957
2987
|
// sync thread state after polling
|
|
@@ -2968,11 +2998,6 @@ static inline void wsp_ggml_graph_compute_thread_sync(struct wsp_ggml_compute_st
|
|
|
2968
2998
|
static inline bool wsp_ggml_graph_compute_poll_for_work(struct wsp_ggml_compute_state * state) {
|
|
2969
2999
|
struct wsp_ggml_threadpool * threadpool = state->threadpool;
|
|
2970
3000
|
|
|
2971
|
-
// Skip polling for unused threads
|
|
2972
|
-
if (!wsp_ggml_graph_compute_thread_active(state)) {
|
|
2973
|
-
return state->pending;
|
|
2974
|
-
}
|
|
2975
|
-
|
|
2976
3001
|
// This seems to make 0 ... 100 a decent range for polling level across modern processors.
|
|
2977
3002
|
// Perhaps, we can adjust it dynamically based on load and things.
|
|
2978
3003
|
const uint64_t n_rounds = 1024UL * 128 * threadpool->poll;
|
|
@@ -3034,7 +3059,6 @@ static thread_ret_t wsp_ggml_graph_compute_secondary_thread(void* data) {
|
|
|
3034
3059
|
wsp_ggml_graph_compute_check_for_work(state);
|
|
3035
3060
|
if (state->pending) {
|
|
3036
3061
|
state->pending = false;
|
|
3037
|
-
|
|
3038
3062
|
wsp_ggml_graph_compute_thread(state);
|
|
3039
3063
|
}
|
|
3040
3064
|
}
|
|
@@ -3049,14 +3073,15 @@ static void wsp_ggml_graph_compute_kickoff(struct wsp_ggml_threadpool * threadpo
|
|
|
3049
3073
|
|
|
3050
3074
|
wsp_ggml_mutex_lock(&threadpool->mutex);
|
|
3051
3075
|
|
|
3052
|
-
|
|
3076
|
+
// Update the number of active threads and the graph count
|
|
3077
|
+
int n_graph = atomic_load_explicit(&threadpool->n_graph, memory_order_relaxed) >> WSP_GGML_THREADPOOL_N_THREADS_BITS;
|
|
3078
|
+
n_graph = ((n_graph + 1) << WSP_GGML_THREADPOOL_N_THREADS_BITS) | (n_threads & WSP_GGML_THREADPOOL_N_THREADS_MASK);
|
|
3053
3079
|
|
|
3054
|
-
|
|
3055
|
-
atomic_store_explicit(&threadpool->n_threads_cur, n_threads, memory_order_relaxed);
|
|
3080
|
+
WSP_GGML_PRINT_DEBUG("compute-kickoff: n_threads %d n_graph %d\n", n_threads, n_graph);
|
|
3056
3081
|
|
|
3057
3082
|
// Indicate the graph is ready to be processed
|
|
3058
3083
|
// We need the full seq-cst fence here because of the polling threads (used in thread_sync)
|
|
3059
|
-
|
|
3084
|
+
atomic_store_explicit(&threadpool->n_graph, n_graph, memory_order_seq_cst);
|
|
3060
3085
|
|
|
3061
3086
|
if (threadpool->pause) {
|
|
3062
3087
|
// Update main thread prio and affinity to match the threadpool settings
|
|
@@ -3094,8 +3119,7 @@ static struct wsp_ggml_threadpool * wsp_ggml_threadpool_new_impl(
|
|
|
3094
3119
|
threadpool->pause = tpp->paused;
|
|
3095
3120
|
threadpool->abort = -1;
|
|
3096
3121
|
threadpool->workers = NULL;
|
|
3097
|
-
threadpool->
|
|
3098
|
-
threadpool->n_threads_cur = tpp->n_threads;
|
|
3122
|
+
threadpool->n_threads = tpp->n_threads;
|
|
3099
3123
|
threadpool->poll = tpp->poll;
|
|
3100
3124
|
threadpool->prio = tpp->prio;
|
|
3101
3125
|
threadpool->ec = WSP_GGML_STATUS_SUCCESS;
|
|
@@ -3190,7 +3214,7 @@ enum wsp_ggml_status wsp_ggml_graph_compute(struct wsp_ggml_cgraph * cgraph, str
|
|
|
3190
3214
|
{
|
|
3191
3215
|
// update the number of threads from the actual number of threads that we got from OpenMP
|
|
3192
3216
|
n_threads = omp_get_num_threads();
|
|
3193
|
-
atomic_store_explicit(&threadpool->
|
|
3217
|
+
atomic_store_explicit(&threadpool->n_graph, n_threads, memory_order_relaxed);
|
|
3194
3218
|
}
|
|
3195
3219
|
|
|
3196
3220
|
// Apply thread CPU mask and priority
|
|
@@ -3203,13 +3227,13 @@ enum wsp_ggml_status wsp_ggml_graph_compute(struct wsp_ggml_cgraph * cgraph, str
|
|
|
3203
3227
|
wsp_ggml_graph_compute_thread(&threadpool->workers[ith]);
|
|
3204
3228
|
}
|
|
3205
3229
|
} else {
|
|
3206
|
-
atomic_store_explicit(&threadpool->
|
|
3230
|
+
atomic_store_explicit(&threadpool->n_graph, 1, memory_order_relaxed);
|
|
3207
3231
|
wsp_ggml_graph_compute_thread(&threadpool->workers[0]);
|
|
3208
3232
|
}
|
|
3209
3233
|
#else
|
|
3210
|
-
if (n_threads > threadpool->
|
|
3211
|
-
WSP_GGML_LOG_WARN("cplan requested more threads (%d) than available (%d)\n", n_threads, threadpool->
|
|
3212
|
-
n_threads = threadpool->
|
|
3234
|
+
if (n_threads > threadpool->n_threads) {
|
|
3235
|
+
WSP_GGML_LOG_WARN("cplan requested more threads (%d) than available (%d)\n", n_threads, threadpool->n_threads);
|
|
3236
|
+
n_threads = threadpool->n_threads;
|
|
3213
3237
|
}
|
|
3214
3238
|
|
|
3215
3239
|
// Kick all threads to start the new graph
|
|
@@ -3296,13 +3320,33 @@ void wsp_ggml_cpu_fp16_to_fp32(const wsp_ggml_fp16_t * x, float * y, int64_t n)
|
|
|
3296
3320
|
__m128 y_vec = _mm_cvtph_ps(x_vec);
|
|
3297
3321
|
_mm_storeu_ps(y + i, y_vec);
|
|
3298
3322
|
}
|
|
3299
|
-
|
|
3300
|
-
|
|
3301
|
-
|
|
3302
|
-
|
|
3303
|
-
|
|
3304
|
-
|
|
3323
|
+
|
|
3324
|
+
#elif defined(__riscv_v_intrinsic) && defined(__riscv_zvfhmin)
|
|
3325
|
+
// calculate step size
|
|
3326
|
+
const int epr = __riscv_vsetvlmax_e16m2();
|
|
3327
|
+
const int step = epr * 2;
|
|
3328
|
+
const int np = (n & ~(step - 1));
|
|
3329
|
+
|
|
3330
|
+
// unroll by 2
|
|
3331
|
+
for (; i < np; i += step) {
|
|
3332
|
+
vfloat16m2_t ax0 = __riscv_vle16_v_f16m2((const _Float16*)x + i, epr);
|
|
3333
|
+
vfloat32m4_t ay0 = __riscv_vfwcvt_f_f_v_f32m4(ax0, epr);
|
|
3334
|
+
__riscv_vse32_v_f32m4(y + i, ay0, epr);
|
|
3335
|
+
|
|
3336
|
+
vfloat16m2_t ax1 = __riscv_vle16_v_f16m2((const _Float16*)x + i + epr, epr);
|
|
3337
|
+
vfloat32m4_t ay1 = __riscv_vfwcvt_f_f_v_f32m4(ax1, epr);
|
|
3338
|
+
__riscv_vse32_v_f32m4(y + i + epr, ay1, epr);
|
|
3305
3339
|
}
|
|
3340
|
+
|
|
3341
|
+
// leftovers
|
|
3342
|
+
int vl;
|
|
3343
|
+
for (i = np; i < n; i += vl) {
|
|
3344
|
+
vl = __riscv_vsetvl_e16m2(n - i);
|
|
3345
|
+
vfloat16m2_t ax0 = __riscv_vle16_v_f16m2((const _Float16*)x + i, vl);
|
|
3346
|
+
vfloat32m4_t ay0 = __riscv_vfwcvt_f_f_v_f32m4(ax0, vl);
|
|
3347
|
+
__riscv_vse32_v_f32m4(y + i, ay0, vl);
|
|
3348
|
+
}
|
|
3349
|
+
|
|
3306
3350
|
#endif
|
|
3307
3351
|
|
|
3308
3352
|
for (; i < n; ++i) {
|
|
@@ -3347,6 +3391,31 @@ void wsp_ggml_cpu_bf16_to_fp32(const wsp_ggml_bf16_t * x, float * y, int64_t n)
|
|
|
3347
3391
|
(const __m128i *)(x + i))),
|
|
3348
3392
|
16)));
|
|
3349
3393
|
}
|
|
3394
|
+
#elif defined(__riscv_v_intrinsic) && defined(__riscv_zvfbfmin)
|
|
3395
|
+
// calculate step size
|
|
3396
|
+
const int epr = __riscv_vsetvlmax_e16m2();
|
|
3397
|
+
const int step = epr * 2;
|
|
3398
|
+
const int np = (n & ~(step - 1));
|
|
3399
|
+
|
|
3400
|
+
// unroll by 2
|
|
3401
|
+
for (; i < np; i += step) {
|
|
3402
|
+
vbfloat16m2_t ax0 = __riscv_vle16_v_bf16m2((const __bf16*)x + i, epr);
|
|
3403
|
+
vfloat32m4_t ay0 = __riscv_vfwcvtbf16_f_f_v_f32m4(ax0, epr);
|
|
3404
|
+
__riscv_vse32_v_f32m4(y + i, ay0, epr);
|
|
3405
|
+
|
|
3406
|
+
vbfloat16m2_t ax1 = __riscv_vle16_v_bf16m2((const __bf16*)x + i + epr, epr);
|
|
3407
|
+
vfloat32m4_t ay1 = __riscv_vfwcvtbf16_f_f_v_f32m4(ax1, epr);
|
|
3408
|
+
__riscv_vse32_v_f32m4(y + i + epr, ay1, epr);
|
|
3409
|
+
}
|
|
3410
|
+
|
|
3411
|
+
// leftovers
|
|
3412
|
+
int vl;
|
|
3413
|
+
for (i = np; i < n; i += vl) {
|
|
3414
|
+
vl = __riscv_vsetvl_e16m2(n - i);
|
|
3415
|
+
vbfloat16m2_t ax0 = __riscv_vle16_v_bf16m2((const __bf16*)x + i, vl);
|
|
3416
|
+
vfloat32m4_t ay0 = __riscv_vfwcvtbf16_f_f_v_f32m4(ax0, vl);
|
|
3417
|
+
__riscv_vse32_v_f32m4(y + i, ay0, vl);
|
|
3418
|
+
}
|
|
3350
3419
|
#endif
|
|
3351
3420
|
for (; i < n; i++) {
|
|
3352
3421
|
y[i] = WSP_GGML_BF16_TO_FP32(x[i]);
|
|
@@ -3449,6 +3518,14 @@ int wsp_ggml_cpu_has_riscv_v(void) {
|
|
|
3449
3518
|
#endif
|
|
3450
3519
|
}
|
|
3451
3520
|
|
|
3521
|
+
int wsp_ggml_cpu_get_rvv_vlen(void) {
|
|
3522
|
+
#if defined(__riscv) && defined(__riscv_v_intrinsic)
|
|
3523
|
+
return wsp_ggml_riscv_arch_features.rvv_vlen;
|
|
3524
|
+
#else
|
|
3525
|
+
return 0;
|
|
3526
|
+
#endif
|
|
3527
|
+
}
|
|
3528
|
+
|
|
3452
3529
|
int wsp_ggml_cpu_has_f16c(void) {
|
|
3453
3530
|
#if defined(__F16C__)
|
|
3454
3531
|
return 1;
|
|
@@ -3615,6 +3692,10 @@ void wsp_ggml_cpu_init(void) {
|
|
|
3615
3692
|
wsp_ggml_init_arm_arch_features();
|
|
3616
3693
|
#endif
|
|
3617
3694
|
|
|
3695
|
+
#if defined(__riscv)
|
|
3696
|
+
wsp_ggml_init_riscv_arch_features();
|
|
3697
|
+
#endif
|
|
3698
|
+
|
|
3618
3699
|
is_first_call = false;
|
|
3619
3700
|
}
|
|
3620
3701
|
|