whisper.rn 0.4.1 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/java/com/rnwhisper/RNWhisper.java +24 -18
- package/android/src/main/java/com/rnwhisper/WhisperVadContext.java +1 -57
- package/android/src/main/jniLibs/arm64-v8a/librnwhisper.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnwhisper_v8fp16_va_2.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/librnwhisper.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/librnwhisper_vfpv4.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnwhisper.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnwhisper_x86_64.so +0 -0
- package/cpp/ggml-backend.cpp +36 -18
- package/cpp/ggml-backend.h +1 -1
- package/cpp/ggml-cpu/amx/mmq.cpp +10 -9
- package/cpp/ggml-cpu/arch/arm/quants.c +109 -108
- package/cpp/ggml-cpu/arch/arm/repack.cpp +13 -12
- package/cpp/ggml-cpu/arch/x86/quants.c +83 -82
- package/cpp/ggml-cpu/arch/x86/repack.cpp +20 -19
- package/cpp/ggml-cpu/common.h +3 -2
- package/cpp/ggml-cpu/ggml-cpu-impl.h +9 -3
- package/cpp/ggml-cpu/ggml-cpu.c +95 -17
- package/cpp/ggml-cpu/ggml-cpu.cpp +4 -0
- package/cpp/ggml-cpu/ops.cpp +775 -74
- package/cpp/ggml-cpu/ops.h +7 -0
- package/cpp/ggml-cpu/quants.c +25 -24
- package/cpp/ggml-cpu/repack.cpp +15 -14
- package/cpp/ggml-cpu/simd-mappings.h +211 -33
- package/cpp/ggml-cpu/vec.cpp +26 -2
- package/cpp/ggml-cpu/vec.h +99 -45
- package/cpp/ggml-cpu.h +2 -0
- package/cpp/ggml-impl.h +125 -183
- package/cpp/ggml-metal-impl.h +27 -0
- package/cpp/ggml-metal.m +298 -41
- package/cpp/ggml-quants.c +6 -6
- package/cpp/ggml-whisper-sim.metallib +0 -0
- package/cpp/ggml-whisper.metallib +0 -0
- package/cpp/ggml.c +269 -40
- package/cpp/ggml.h +122 -2
- package/cpp/gguf.cpp +5 -1
- package/cpp/whisper.cpp +4 -0
- package/cpp/whisper.h +2 -0
- package/ios/RNWhisper.mm +35 -38
- package/ios/RNWhisperVadContext.h +1 -1
- package/ios/RNWhisperVadContext.mm +2 -6
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +1 -1
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +125 -183
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +122 -2
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper.h +2 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +1 -1
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +125 -183
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +122 -2
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +2 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +1 -1
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +125 -183
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +122 -2
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper.h +2 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +1 -1
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +125 -183
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +122 -2
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +2 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
- package/package.json +1 -1
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
#include "ggml-impl.h"
|
|
7
7
|
#include "ggml-cpu.h"
|
|
8
8
|
#include "ggml-cpu-impl.h"
|
|
9
|
+
#include "simd-mappings.h"
|
|
9
10
|
#include "traits.h"
|
|
10
11
|
|
|
11
12
|
#include <cmath>
|
|
@@ -51,7 +52,7 @@ void wsp_ggml_wsp_quantize_mat_q8_0_4x4(const float * WSP_GGML_RESTRICT x, void
|
|
|
51
52
|
const float d = amax / ((1 << 7) - 1);
|
|
52
53
|
id[row_iter] = d ? 1.0f / d : 0.0f;
|
|
53
54
|
|
|
54
|
-
y[i].d[row_iter] =
|
|
55
|
+
y[i].d[row_iter] = WSP_GGML_CPU_FP32_TO_FP16(d);
|
|
55
56
|
}
|
|
56
57
|
|
|
57
58
|
for (int j = 0; j < 8; j++) {
|
|
@@ -102,7 +103,7 @@ void wsp_ggml_wsp_quantize_mat_q8_0_4x4(const float * WSP_GGML_RESTRICT x, void
|
|
|
102
103
|
const float d = amax / ((1 << 7) - 1);
|
|
103
104
|
id[row_iter] = d ? 1.0f / d : 0.0f;
|
|
104
105
|
|
|
105
|
-
y[i].d[row_iter] =
|
|
106
|
+
y[i].d[row_iter] = WSP_GGML_CPU_FP32_TO_FP16(d);
|
|
106
107
|
}
|
|
107
108
|
|
|
108
109
|
for (int j = 0; j < QK8_0 * 4; j++) {
|
|
@@ -145,7 +146,7 @@ void wsp_ggml_wsp_quantize_mat_q8_0_4x8(const float * WSP_GGML_RESTRICT x, void
|
|
|
145
146
|
const float d = amax / ((1 << 7) - 1);
|
|
146
147
|
id[row_iter] = d ? 1.0f / d : 0.0f;
|
|
147
148
|
|
|
148
|
-
y[i].d[row_iter] =
|
|
149
|
+
y[i].d[row_iter] = WSP_GGML_CPU_FP32_TO_FP16(d);
|
|
149
150
|
}
|
|
150
151
|
|
|
151
152
|
for (int j = 0; j < 4; j++) {
|
|
@@ -221,7 +222,7 @@ void wsp_ggml_wsp_quantize_mat_q8_0_4x8(const float * WSP_GGML_RESTRICT x, void
|
|
|
221
222
|
const float d = amax / ((1 << 7) - 1);
|
|
222
223
|
id[row_iter] = d ? 1.0f / d : 0.0f;
|
|
223
224
|
|
|
224
|
-
y[i].d[row_iter] =
|
|
225
|
+
y[i].d[row_iter] = WSP_GGML_CPU_FP32_TO_FP16(d);
|
|
225
226
|
}
|
|
226
227
|
|
|
227
228
|
for (int j = 0; j < QK8_0 * 4; j++) {
|
|
@@ -311,7 +312,7 @@ void wsp_ggml_gemv_q4_0_4x4_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs,
|
|
|
311
312
|
const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0);
|
|
312
313
|
sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])) >> 4;
|
|
313
314
|
}
|
|
314
|
-
sumf[j] += sumi *
|
|
315
|
+
sumf[j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d);
|
|
315
316
|
}
|
|
316
317
|
}
|
|
317
318
|
}
|
|
@@ -399,7 +400,7 @@ void wsp_ggml_gemv_q4_0_4x8_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs,
|
|
|
399
400
|
const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0);
|
|
400
401
|
sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])) >> 4;
|
|
401
402
|
}
|
|
402
|
-
sumf[j] += sumi *
|
|
403
|
+
sumf[j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d);
|
|
403
404
|
}
|
|
404
405
|
}
|
|
405
406
|
}
|
|
@@ -514,7 +515,7 @@ void wsp_ggml_gemv_q4_0_8x8_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs,
|
|
|
514
515
|
const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0);
|
|
515
516
|
sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])) >> 4;
|
|
516
517
|
}
|
|
517
|
-
sumf[j] += sumi *
|
|
518
|
+
sumf[j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d);
|
|
518
519
|
}
|
|
519
520
|
}
|
|
520
521
|
}
|
|
@@ -608,7 +609,7 @@ void wsp_ggml_gemv_iq4_nl_4x4_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs
|
|
|
608
609
|
const int v1 = kvalues_iq4nl[b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] >> 4];
|
|
609
610
|
sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2]));
|
|
610
611
|
}
|
|
611
|
-
sumf[j] += sumi *
|
|
612
|
+
sumf[j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d);
|
|
612
613
|
}
|
|
613
614
|
}
|
|
614
615
|
}
|
|
@@ -1117,7 +1118,7 @@ void wsp_ggml_gemm_q4_0_4x4_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs,
|
|
|
1117
1118
|
sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) +
|
|
1118
1119
|
(v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])) >> 4;
|
|
1119
1120
|
}
|
|
1120
|
-
sumf[m][j] += sumi *
|
|
1121
|
+
sumf[m][j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]);
|
|
1121
1122
|
}
|
|
1122
1123
|
}
|
|
1123
1124
|
}
|
|
@@ -1570,7 +1571,7 @@ void wsp_ggml_gemm_q4_0_4x8_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs,
|
|
|
1570
1571
|
sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) +
|
|
1571
1572
|
(v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])) >> 4;
|
|
1572
1573
|
}
|
|
1573
|
-
sumf[m][j] += sumi *
|
|
1574
|
+
sumf[m][j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]);
|
|
1574
1575
|
}
|
|
1575
1576
|
}
|
|
1576
1577
|
}
|
|
@@ -2039,7 +2040,7 @@ void wsp_ggml_gemm_q4_0_8x8_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs,
|
|
|
2039
2040
|
sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) +
|
|
2040
2041
|
(v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])) >> 4;
|
|
2041
2042
|
}
|
|
2042
|
-
sumf[m][j] += sumi *
|
|
2043
|
+
sumf[m][j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]);
|
|
2043
2044
|
}
|
|
2044
2045
|
}
|
|
2045
2046
|
}
|
|
@@ -2147,7 +2148,7 @@ void wsp_ggml_gemm_iq4_nl_4x4_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs
|
|
|
2147
2148
|
sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) +
|
|
2148
2149
|
(v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4]));
|
|
2149
2150
|
}
|
|
2150
|
-
sumf[m][j] += sumi *
|
|
2151
|
+
sumf[m][j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]);
|
|
2151
2152
|
}
|
|
2152
2153
|
}
|
|
2153
2154
|
}
|