whisper.rn 0.4.2 → 0.5.0-rc.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -3
- package/android/build.gradle +70 -11
- package/android/src/main/CMakeLists.txt +28 -1
- package/android/src/main/java/com/rnwhisper/JSCallInvokerResolver.java +40 -0
- package/android/src/main/java/com/rnwhisper/RNWhisper.java +80 -27
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +21 -9
- package/android/src/main/java/com/rnwhisper/WhisperVadContext.java +1 -1
- package/android/src/main/jni.cpp +79 -2
- package/android/src/main/jniLibs/arm64-v8a/librnwhisper.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnwhisper_v8fp16_va_2.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/librnwhisper.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/librnwhisper_vfpv4.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnwhisper.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnwhisper_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +5 -0
- package/android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java +5 -0
- package/cpp/ggml-backend.cpp +36 -18
- package/cpp/ggml-backend.h +1 -1
- package/cpp/ggml-cpu/amx/mmq.cpp +10 -9
- package/cpp/ggml-cpu/arch/arm/quants.c +109 -108
- package/cpp/ggml-cpu/arch/arm/repack.cpp +13 -12
- package/cpp/ggml-cpu/arch/x86/quants.c +83 -82
- package/cpp/ggml-cpu/arch/x86/repack.cpp +20 -19
- package/cpp/ggml-cpu/common.h +3 -2
- package/cpp/ggml-cpu/ggml-cpu-impl.h +9 -3
- package/cpp/ggml-cpu/ggml-cpu.c +95 -17
- package/cpp/ggml-cpu/ggml-cpu.cpp +4 -0
- package/cpp/ggml-cpu/ops.cpp +775 -74
- package/cpp/ggml-cpu/ops.h +7 -0
- package/cpp/ggml-cpu/quants.c +25 -24
- package/cpp/ggml-cpu/repack.cpp +15 -14
- package/cpp/ggml-cpu/simd-mappings.h +211 -33
- package/cpp/ggml-cpu/vec.cpp +26 -2
- package/cpp/ggml-cpu/vec.h +99 -45
- package/cpp/ggml-cpu.h +2 -0
- package/cpp/ggml-impl.h +125 -183
- package/cpp/ggml-metal-impl.h +27 -0
- package/cpp/ggml-metal.m +298 -41
- package/cpp/ggml-quants.c +6 -6
- package/cpp/ggml-whisper-sim.metallib +0 -0
- package/cpp/ggml-whisper.metallib +0 -0
- package/cpp/ggml.c +269 -40
- package/cpp/ggml.h +122 -2
- package/cpp/gguf.cpp +5 -1
- package/cpp/jsi/RNWhisperJSI.cpp +681 -0
- package/cpp/jsi/RNWhisperJSI.h +44 -0
- package/cpp/jsi/ThreadPool.h +100 -0
- package/cpp/whisper.cpp +4 -0
- package/cpp/whisper.h +2 -0
- package/ios/RNWhisper.h +3 -0
- package/ios/RNWhisper.mm +66 -31
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +1 -1
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +125 -183
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +122 -2
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper.h +2 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +1 -1
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +125 -183
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +122 -2
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +2 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +1 -1
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +125 -183
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +122 -2
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper.h +2 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +1 -1
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +125 -183
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +122 -2
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +2 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
- package/jest/mock.js +1 -0
- package/lib/commonjs/NativeRNWhisper.js.map +1 -1
- package/lib/commonjs/index.js +83 -2
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNWhisper.js.map +1 -1
- package/lib/module/index.js +83 -2
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNWhisper.d.ts +4 -0
- package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +18 -6
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +2 -3
- package/src/NativeRNWhisper.ts +2 -0
- package/src/index.ts +162 -33
- package/whisper-rn.podspec +6 -3
|
@@ -26,6 +26,11 @@ public class RNWhisperModule extends NativeRNWhisperSpec {
|
|
|
26
26
|
rnwhisper = new RNWhisper(reactContext);
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
+
@ReactMethod
|
|
30
|
+
public void installJSIBindings(Promise promise) {
|
|
31
|
+
rnwhisper.installJSIBindings(promise);
|
|
32
|
+
}
|
|
33
|
+
|
|
29
34
|
@Override
|
|
30
35
|
@NonNull
|
|
31
36
|
public String getName() {
|
|
@@ -26,6 +26,11 @@ public class RNWhisperModule extends ReactContextBaseJavaModule {
|
|
|
26
26
|
rnwhisper = new RNWhisper(reactContext);
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
+
@ReactMethod
|
|
30
|
+
public void installJSIBindings(Promise promise) {
|
|
31
|
+
rnwhisper.installJSIBindings(promise);
|
|
32
|
+
}
|
|
33
|
+
|
|
29
34
|
@Override
|
|
30
35
|
@NonNull
|
|
31
36
|
public String getName() {
|
package/cpp/ggml-backend.cpp
CHANGED
|
@@ -817,8 +817,9 @@ static void wsp_ggml_backend_sched_print_assignments(wsp_ggml_backend_sched_t sc
|
|
|
817
817
|
}
|
|
818
818
|
if (sched->debug > 1) {
|
|
819
819
|
wsp_ggml_backend_t tensor_backend = wsp_ggml_backend_sched_get_tensor_backend(sched, node);
|
|
820
|
-
WSP_GGML_LOG_DEBUG("node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s]:", i, wsp_ggml_op_name(node->op), node->name,
|
|
821
|
-
fmt_size(wsp_ggml_nbytes(node)), tensor_backend ? wsp_ggml_backend_name(tensor_backend) : "NULL", GET_CAUSE(node)
|
|
820
|
+
WSP_GGML_LOG_DEBUG("node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s] use=%d:", i, wsp_ggml_op_name(node->op), node->name,
|
|
821
|
+
fmt_size(wsp_ggml_nbytes(node)), tensor_backend ? wsp_ggml_backend_name(tensor_backend) : "NULL", GET_CAUSE(node),
|
|
822
|
+
graph->use_counts[wsp_ggml_hash_find(&graph->visited_hash_set, node)]);
|
|
822
823
|
for (int j = 0; j < WSP_GGML_MAX_SRC; j++) {
|
|
823
824
|
struct wsp_ggml_tensor * src = node->src[j];
|
|
824
825
|
if (src == NULL) {
|
|
@@ -1826,7 +1827,7 @@ void wsp_ggml_backend_graph_copy_free(struct wsp_ggml_backend_graph_copy copy) {
|
|
|
1826
1827
|
wsp_ggml_free(copy.ctx_unallocated);
|
|
1827
1828
|
}
|
|
1828
1829
|
|
|
1829
|
-
bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggml_backend_t backend2, struct wsp_ggml_cgraph * graph, wsp_ggml_backend_eval_callback callback, void * user_data) {
|
|
1830
|
+
bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggml_backend_t backend2, struct wsp_ggml_cgraph * graph, wsp_ggml_backend_eval_callback callback, void * user_data, struct wsp_ggml_tensor * test_node) {
|
|
1830
1831
|
struct wsp_ggml_backend_graph_copy copy = wsp_ggml_backend_graph_copy(backend2, graph);
|
|
1831
1832
|
if (copy.buffer == NULL) {
|
|
1832
1833
|
return false;
|
|
@@ -1837,28 +1838,45 @@ bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggm
|
|
|
1837
1838
|
|
|
1838
1839
|
assert(g1->n_nodes == g2->n_nodes);
|
|
1839
1840
|
|
|
1840
|
-
|
|
1841
|
-
|
|
1842
|
-
|
|
1841
|
+
if (test_node != nullptr) {
|
|
1842
|
+
// Compute the whole graph and only test the output for a specific tensor
|
|
1843
|
+
wsp_ggml_backend_graph_compute(backend1, g1);
|
|
1844
|
+
wsp_ggml_backend_graph_compute(backend2, g2);
|
|
1843
1845
|
|
|
1844
|
-
|
|
1846
|
+
int test_node_idx = -1;
|
|
1847
|
+
for (int i = 0; i < g1->n_nodes; i++) {
|
|
1848
|
+
struct wsp_ggml_tensor * t1 = g1->nodes[i];
|
|
1849
|
+
if (t1 == test_node) {
|
|
1850
|
+
test_node_idx = i;
|
|
1851
|
+
break;
|
|
1852
|
+
}
|
|
1853
|
+
}
|
|
1854
|
+
WSP_GGML_ASSERT(test_node_idx != -1);
|
|
1845
1855
|
|
|
1846
|
-
|
|
1847
|
-
|
|
1856
|
+
callback(test_node_idx, g1->nodes[test_node_idx], g2->nodes[test_node_idx], user_data);
|
|
1857
|
+
} else {
|
|
1858
|
+
for (int i = 0; i < g1->n_nodes; i++) {
|
|
1859
|
+
struct wsp_ggml_tensor * t1 = g1->nodes[i];
|
|
1860
|
+
struct wsp_ggml_tensor * t2 = g2->nodes[i];
|
|
1848
1861
|
|
|
1849
|
-
|
|
1850
|
-
wsp_ggml_backend_graph_compute(backend2, &g2v);
|
|
1862
|
+
assert(t1->op == t2->op && wsp_ggml_are_same_layout(t1, t2));
|
|
1851
1863
|
|
|
1852
|
-
|
|
1853
|
-
|
|
1854
|
-
}
|
|
1864
|
+
struct wsp_ggml_cgraph g1v = wsp_ggml_graph_view(g1, i, i + 1);
|
|
1865
|
+
struct wsp_ggml_cgraph g2v = wsp_ggml_graph_view(g2, i, i + 1);
|
|
1855
1866
|
|
|
1856
|
-
|
|
1857
|
-
|
|
1858
|
-
|
|
1867
|
+
wsp_ggml_backend_graph_compute(backend1, &g1v);
|
|
1868
|
+
wsp_ggml_backend_graph_compute(backend2, &g2v);
|
|
1869
|
+
|
|
1870
|
+
if (wsp_ggml_is_view_op(t1->op)) {
|
|
1871
|
+
continue;
|
|
1872
|
+
}
|
|
1873
|
+
|
|
1874
|
+
// compare results, calculate rms etc
|
|
1875
|
+
if (!callback(i, t1, t2, user_data)) {
|
|
1876
|
+
break;
|
|
1877
|
+
}
|
|
1859
1878
|
}
|
|
1860
1879
|
}
|
|
1861
|
-
|
|
1862
1880
|
wsp_ggml_backend_graph_copy_free(copy);
|
|
1863
1881
|
|
|
1864
1882
|
return true;
|
package/cpp/ggml-backend.h
CHANGED
|
@@ -339,7 +339,7 @@ extern "C" {
|
|
|
339
339
|
typedef bool (*wsp_ggml_backend_eval_callback)(int node_index, struct wsp_ggml_tensor * t1, struct wsp_ggml_tensor * t2, void * user_data);
|
|
340
340
|
|
|
341
341
|
// Compare the output of two backends
|
|
342
|
-
WSP_GGML_API bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggml_backend_t backend2, struct wsp_ggml_cgraph * graph, wsp_ggml_backend_eval_callback callback, void * user_data);
|
|
342
|
+
WSP_GGML_API bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggml_backend_t backend2, struct wsp_ggml_cgraph * graph, wsp_ggml_backend_eval_callback callback, void * user_data, struct wsp_ggml_tensor * test_node);
|
|
343
343
|
|
|
344
344
|
// Tensor initialization
|
|
345
345
|
WSP_GGML_API enum wsp_ggml_status wsp_ggml_backend_tensor_alloc(wsp_ggml_backend_buffer_t buffer, struct wsp_ggml_tensor * tensor, void * addr);
|
package/cpp/ggml-cpu/amx/mmq.cpp
CHANGED
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
#include "mmq.h"
|
|
9
9
|
#include "ggml-impl.h"
|
|
10
10
|
#include "ggml-cpu-impl.h"
|
|
11
|
+
#include "simd-mappings.h"
|
|
11
12
|
#include "quants.h"
|
|
12
13
|
#include "ggml-quants.h"
|
|
13
14
|
#include <algorithm>
|
|
@@ -453,7 +454,7 @@ void wsp_quantize_row_q8_K_vnni(const float * RESTRICT x, void * RESTRICT vy, in
|
|
|
453
454
|
|
|
454
455
|
// Quantize these floats
|
|
455
456
|
const float iscale = 127.f / amax;
|
|
456
|
-
y[i].d =
|
|
457
|
+
y[i].d = WSP_GGML_CPU_FP32_TO_FP16(1 / iscale);
|
|
457
458
|
const float id = ( amax != 0.0f ) ? iscale : 0.f;
|
|
458
459
|
const __m512 vscale = _mm512_set1_ps(id);
|
|
459
460
|
|
|
@@ -1090,7 +1091,7 @@ struct acc_C<block_q8_0, block_q4_0, is_acc> {
|
|
|
1090
1091
|
const __m512 vd0 = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)((const char *)packed_B + offset)));
|
|
1091
1092
|
|
|
1092
1093
|
for (int m = 0; m < nr; ++m) {
|
|
1093
|
-
const __m512 vd1 = _mm512_set1_ps(
|
|
1094
|
+
const __m512 vd1 = _mm512_set1_ps(WSP_GGML_CPU_FP16_TO_FP32(A[m * lda].d));
|
|
1094
1095
|
const __m512 vtile = _mm512_cvtepi32_ps(_mm512_loadu_si512(tile + m * TILE_N));
|
|
1095
1096
|
|
|
1096
1097
|
__m512 vsum;
|
|
@@ -1113,8 +1114,8 @@ struct acc_C<block_q8_1, block_q4_1, is_acc> {
|
|
|
1113
1114
|
const __m512 vm0 = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)((const char *)packed_B + offset + TILE_N * sizeof(wsp_ggml_half))));
|
|
1114
1115
|
|
|
1115
1116
|
for (int m = 0; m < nr; ++m) {
|
|
1116
|
-
const __m512 vd1 = _mm512_set1_ps(
|
|
1117
|
-
const __m512 vs1 = _mm512_set1_ps(
|
|
1117
|
+
const __m512 vd1 = _mm512_set1_ps(WSP_GGML_CPU_FP16_TO_FP32(A[m * lda].d));
|
|
1118
|
+
const __m512 vs1 = _mm512_set1_ps(WSP_GGML_CPU_FP16_TO_FP32(A[m * lda].s));
|
|
1118
1119
|
const __m512 vtile = _mm512_cvtepi32_ps(_mm512_loadu_si512(tile + m * TILE_N));
|
|
1119
1120
|
|
|
1120
1121
|
__m512 vsum;
|
|
@@ -1137,7 +1138,7 @@ struct acc_C<block_q8_0, block_q8_0, is_acc> {
|
|
|
1137
1138
|
const __m512 vd0 = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)((const char *)packed_B + offset)));
|
|
1138
1139
|
|
|
1139
1140
|
for (int m = 0; m < nr; ++m) {
|
|
1140
|
-
const __m512 vd1 = _mm512_set1_ps(
|
|
1141
|
+
const __m512 vd1 = _mm512_set1_ps(WSP_GGML_CPU_FP16_TO_FP32(A[m * lda].d));
|
|
1141
1142
|
const __m512 vtile = _mm512_cvtepi32_ps(_mm512_loadu_si512(tile + m * TILE_N));
|
|
1142
1143
|
|
|
1143
1144
|
__m512 vsum;
|
|
@@ -1437,7 +1438,7 @@ struct tinygemm_kernel_vnni<block_q8_0, block_q4_0, float, BLOCK_M, BLOCK_N, BLO
|
|
|
1437
1438
|
va[k] = _mm512_set1_epi32(a_ptr[k]);
|
|
1438
1439
|
vcomp = _mm512_dpbusd_epi32(vcomp, off, va[k]);
|
|
1439
1440
|
}
|
|
1440
|
-
vd1 = _mm512_set1_ps(
|
|
1441
|
+
vd1 = _mm512_set1_ps(WSP_GGML_CPU_FP16_TO_FP32(A[0 * KB + i].d));
|
|
1441
1442
|
}
|
|
1442
1443
|
|
|
1443
1444
|
// load b
|
|
@@ -1498,8 +1499,8 @@ struct tinygemm_kernel_vnni<block_q8_1, block_q4_1, float, 1, BLOCK_N, BLOCK_K>
|
|
|
1498
1499
|
for (int k = 0; k < 8; ++k) {
|
|
1499
1500
|
va[k] = _mm512_set1_epi32(a_ptr[k]);
|
|
1500
1501
|
}
|
|
1501
|
-
vd1 = _mm512_set1_ps(
|
|
1502
|
-
vs1 = _mm512_set1_ps(
|
|
1502
|
+
vd1 = _mm512_set1_ps(WSP_GGML_CPU_FP16_TO_FP32(A[0 * KB + i].d));
|
|
1503
|
+
vs1 = _mm512_set1_ps(WSP_GGML_CPU_FP16_TO_FP32(A[0 * KB + i].s));
|
|
1503
1504
|
}
|
|
1504
1505
|
|
|
1505
1506
|
// load b
|
|
@@ -1571,7 +1572,7 @@ struct tinygemm_kernel_vnni<block_q8_0, block_q8_0, float, BLOCK_M, BLOCK_N, BLO
|
|
|
1571
1572
|
va[k] = _mm512_set1_epi32(a_ptr[k]);
|
|
1572
1573
|
va[k] = _mm512_add_epi8(va[k], off);
|
|
1573
1574
|
}
|
|
1574
|
-
vd1 = _mm512_set1_ps(
|
|
1575
|
+
vd1 = _mm512_set1_ps(WSP_GGML_CPU_FP16_TO_FP32(A[0 * KB + i].d));
|
|
1575
1576
|
}
|
|
1576
1577
|
|
|
1577
1578
|
// load b
|