whisper.rn 0.4.0-rc.9 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -1
- package/android/build.gradle +12 -3
- package/android/src/main/CMakeLists.txt +43 -13
- package/android/src/main/java/com/rnwhisper/RNWhisper.java +211 -0
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +64 -36
- package/android/src/main/java/com/rnwhisper/WhisperVadContext.java +157 -0
- package/android/src/main/jni.cpp +205 -0
- package/android/src/main/jniLibs/arm64-v8a/librnwhisper.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnwhisper_v8fp16_va_2.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/librnwhisper.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/librnwhisper_vfpv4.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnwhisper.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnwhisper_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +26 -0
- package/android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java +26 -0
- package/cpp/coreml/whisper-compat.h +10 -0
- package/cpp/coreml/whisper-compat.m +35 -0
- package/cpp/coreml/whisper-decoder-impl.h +27 -15
- package/cpp/coreml/whisper-decoder-impl.m +36 -10
- package/cpp/coreml/whisper-encoder-impl.h +21 -9
- package/cpp/coreml/whisper-encoder-impl.m +29 -3
- package/cpp/ggml-alloc.c +39 -37
- package/cpp/ggml-alloc.h +1 -1
- package/cpp/ggml-backend-impl.h +55 -27
- package/cpp/ggml-backend-reg.cpp +591 -0
- package/cpp/ggml-backend.cpp +336 -955
- package/cpp/ggml-backend.h +70 -42
- package/cpp/ggml-common.h +57 -49
- package/cpp/ggml-cpp.h +39 -0
- package/cpp/ggml-cpu/amx/amx.cpp +221 -0
- package/cpp/ggml-cpu/amx/amx.h +8 -0
- package/cpp/ggml-cpu/amx/common.h +91 -0
- package/cpp/ggml-cpu/amx/mmq.cpp +2511 -0
- package/cpp/ggml-cpu/amx/mmq.h +10 -0
- package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/cpp/ggml-cpu/arch/arm/quants.c +4113 -0
- package/cpp/ggml-cpu/arch/arm/repack.cpp +2162 -0
- package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
- package/cpp/ggml-cpu/arch/x86/quants.c +4310 -0
- package/cpp/ggml-cpu/arch/x86/repack.cpp +3284 -0
- package/cpp/ggml-cpu/arch-fallback.h +184 -0
- package/cpp/ggml-cpu/binary-ops.cpp +158 -0
- package/cpp/ggml-cpu/binary-ops.h +16 -0
- package/cpp/ggml-cpu/common.h +72 -0
- package/cpp/ggml-cpu/ggml-cpu-impl.h +511 -0
- package/cpp/ggml-cpu/ggml-cpu.c +3473 -0
- package/cpp/ggml-cpu/ggml-cpu.cpp +671 -0
- package/cpp/ggml-cpu/ops.cpp +9085 -0
- package/cpp/ggml-cpu/ops.h +111 -0
- package/cpp/ggml-cpu/quants.c +1157 -0
- package/cpp/ggml-cpu/quants.h +89 -0
- package/cpp/ggml-cpu/repack.cpp +1570 -0
- package/cpp/ggml-cpu/repack.h +98 -0
- package/cpp/ggml-cpu/simd-mappings.h +1006 -0
- package/cpp/ggml-cpu/traits.cpp +36 -0
- package/cpp/ggml-cpu/traits.h +38 -0
- package/cpp/ggml-cpu/unary-ops.cpp +186 -0
- package/cpp/ggml-cpu/unary-ops.h +28 -0
- package/cpp/ggml-cpu/vec.cpp +321 -0
- package/cpp/ggml-cpu/vec.h +973 -0
- package/cpp/ggml-cpu.h +143 -0
- package/cpp/ggml-impl.h +417 -23
- package/cpp/ggml-metal-impl.h +622 -0
- package/cpp/ggml-metal.h +9 -9
- package/cpp/ggml-metal.m +3451 -1344
- package/cpp/ggml-opt.cpp +1037 -0
- package/cpp/ggml-opt.h +237 -0
- package/cpp/ggml-quants.c +296 -10818
- package/cpp/ggml-quants.h +78 -125
- package/cpp/ggml-threading.cpp +12 -0
- package/cpp/ggml-threading.h +14 -0
- package/cpp/ggml-whisper-sim.metallib +0 -0
- package/cpp/ggml-whisper.metallib +0 -0
- package/cpp/ggml.c +4633 -21450
- package/cpp/ggml.h +320 -661
- package/cpp/gguf.cpp +1347 -0
- package/cpp/gguf.h +202 -0
- package/cpp/rn-whisper.cpp +4 -11
- package/cpp/whisper-arch.h +197 -0
- package/cpp/whisper.cpp +2022 -495
- package/cpp/whisper.h +75 -18
- package/ios/CMakeLists.txt +95 -0
- package/ios/RNWhisper.h +5 -0
- package/ios/RNWhisper.mm +147 -0
- package/ios/RNWhisperAudioUtils.m +4 -0
- package/ios/RNWhisperContext.h +5 -0
- package/ios/RNWhisperContext.mm +22 -26
- package/ios/RNWhisperVadContext.h +29 -0
- package/ios/RNWhisperVadContext.mm +152 -0
- package/ios/rnwhisper.xcframework/Info.plist +74 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-common.h +1861 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +603 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-opt.h +237 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +2221 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/gguf.h +202 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-whisper.h +52 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper-arch.h +197 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper.h +739 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +1861 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +603 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +237 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +2221 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/gguf.h +202 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +52 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper-arch.h +197 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +739 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +101 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-common.h +1861 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +603 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-opt.h +237 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +2221 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/gguf.h +202 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-whisper.h +52 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper-arch.h +197 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper.h +739 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +1861 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +603 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +237 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +2221 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/gguf.h +202 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +52 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper-arch.h +197 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +739 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +101 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
- package/jest/mock.js +24 -0
- package/lib/commonjs/NativeRNWhisper.js.map +1 -1
- package/lib/commonjs/index.js +111 -1
- package/lib/commonjs/index.js.map +1 -1
- package/lib/commonjs/version.json +1 -1
- package/lib/module/NativeRNWhisper.js.map +1 -1
- package/lib/module/index.js +112 -0
- package/lib/module/index.js.map +1 -1
- package/lib/module/version.json +1 -1
- package/lib/typescript/NativeRNWhisper.d.ts +35 -0
- package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +39 -3
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +10 -6
- package/src/NativeRNWhisper.ts +48 -0
- package/src/index.ts +132 -1
- package/src/version.json +1 -1
- package/whisper-rn.podspec +11 -18
- package/cpp/README.md +0 -4
- package/cpp/ggml-aarch64.c +0 -3209
- package/cpp/ggml-aarch64.h +0 -39
- package/cpp/ggml-cpu-impl.h +0 -614
package/cpp/whisper.h
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
#define WHISPER_H
|
|
3
3
|
|
|
4
4
|
#include "ggml.h"
|
|
5
|
+
#include "ggml-cpu.h"
|
|
5
6
|
|
|
6
7
|
#include <stddef.h>
|
|
7
8
|
#include <stdint.h>
|
|
@@ -189,6 +190,15 @@ extern "C" {
|
|
|
189
190
|
uint32_t value; // Unicode code point or rule ID
|
|
190
191
|
} whisper_grammar_element;
|
|
191
192
|
|
|
193
|
+
typedef struct whisper_vad_params {
|
|
194
|
+
float threshold; // Probability threshold to consider as speech.
|
|
195
|
+
int min_speech_duration_ms; // Min duration for a valid speech segment.
|
|
196
|
+
int min_silence_duration_ms; // Min silence duration to consider speech as ended.
|
|
197
|
+
float max_speech_duration_s; // Max duration of a speech segment before forcing a new segment.
|
|
198
|
+
int speech_pad_ms; // Padding added before and after speech segments.
|
|
199
|
+
float samples_overlap; // Overlap in seconds when copying audio samples from speech segment.
|
|
200
|
+
} whisper_vad_params;
|
|
201
|
+
|
|
192
202
|
// Various functions for loading a ggml whisper model.
|
|
193
203
|
// Allocate (almost) all memory needed for the model.
|
|
194
204
|
// Return NULL on failure
|
|
@@ -425,21 +435,11 @@ extern "C" {
|
|
|
425
435
|
|
|
426
436
|
// Performance information from the default state.
|
|
427
437
|
struct whisper_timings {
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
int32_t n_sample;
|
|
434
|
-
int32_t n_encode;
|
|
435
|
-
int32_t n_decode;
|
|
436
|
-
int32_t n_batchd;
|
|
437
|
-
int32_t n_prompt;
|
|
438
|
-
int64_t t_sample_us;
|
|
439
|
-
int64_t t_encode_us;
|
|
440
|
-
int64_t t_decode_us;
|
|
441
|
-
int64_t t_batchd_us;
|
|
442
|
-
int64_t t_prompt_us;
|
|
438
|
+
float sample_ms;
|
|
439
|
+
float encode_ms;
|
|
440
|
+
float decode_ms;
|
|
441
|
+
float batchd_ms;
|
|
442
|
+
float prompt_ms;
|
|
443
443
|
};
|
|
444
444
|
WHISPER_API struct whisper_timings * whisper_get_timings(struct whisper_context * ctx);
|
|
445
445
|
WHISPER_API void whisper_print_timings(struct whisper_context * ctx);
|
|
@@ -532,8 +532,8 @@ extern "C" {
|
|
|
532
532
|
bool detect_language;
|
|
533
533
|
|
|
534
534
|
// common decoding parameters:
|
|
535
|
-
bool suppress_blank;
|
|
536
|
-
bool
|
|
535
|
+
bool suppress_blank; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L89
|
|
536
|
+
bool suppress_nst; // non-speech tokens, ref: https://github.com/openai/whisper/blob/7858aa9c08d98f75575035ecd6481f462d66ca27/whisper/tokenizer.py#L224-L253
|
|
537
537
|
|
|
538
538
|
float temperature; // initial decoding temperature, ref: https://ai.stackexchange.com/a/32478
|
|
539
539
|
float max_initial_ts; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L97
|
|
@@ -544,7 +544,7 @@ extern "C" {
|
|
|
544
544
|
float temperature_inc;
|
|
545
545
|
float entropy_thold; // similar to OpenAI's "compression_ratio_threshold"
|
|
546
546
|
float logprob_thold;
|
|
547
|
-
float no_speech_thold;
|
|
547
|
+
float no_speech_thold;
|
|
548
548
|
|
|
549
549
|
struct {
|
|
550
550
|
int best_of; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L264
|
|
@@ -580,11 +580,18 @@ extern "C" {
|
|
|
580
580
|
size_t n_grammar_rules;
|
|
581
581
|
size_t i_start_rule;
|
|
582
582
|
float grammar_penalty;
|
|
583
|
+
|
|
584
|
+
// Voice Activity Detection (VAD) params
|
|
585
|
+
bool vad; // Enable VAD
|
|
586
|
+
const char * vad_model_path; // Path to VAD model
|
|
587
|
+
|
|
588
|
+
whisper_vad_params vad_params;
|
|
583
589
|
};
|
|
584
590
|
|
|
585
591
|
// NOTE: this function allocates memory, and it is the responsibility of the caller to free the pointer - see whisper_free_context_params & whisper_free_params()
|
|
586
592
|
WHISPER_API struct whisper_context_params * whisper_context_default_params_by_ref(void);
|
|
587
593
|
WHISPER_API struct whisper_context_params whisper_context_default_params (void);
|
|
594
|
+
|
|
588
595
|
WHISPER_API struct whisper_full_params * whisper_full_default_params_by_ref(enum whisper_sampling_strategy strategy);
|
|
589
596
|
WHISPER_API struct whisper_full_params whisper_full_default_params (enum whisper_sampling_strategy strategy);
|
|
590
597
|
|
|
@@ -662,6 +669,53 @@ extern "C" {
|
|
|
662
669
|
WHISPER_API float whisper_full_get_token_p (struct whisper_context * ctx, int i_segment, int i_token);
|
|
663
670
|
WHISPER_API float whisper_full_get_token_p_from_state(struct whisper_state * state, int i_segment, int i_token);
|
|
664
671
|
|
|
672
|
+
//
|
|
673
|
+
// Voice Activity Detection (VAD)
|
|
674
|
+
//
|
|
675
|
+
|
|
676
|
+
struct whisper_vad_context;
|
|
677
|
+
|
|
678
|
+
WHISPER_API struct whisper_vad_params whisper_vad_default_params(void);
|
|
679
|
+
|
|
680
|
+
struct whisper_vad_context_params {
|
|
681
|
+
int n_threads; // The number of threads to use for processing.
|
|
682
|
+
bool use_gpu;
|
|
683
|
+
int gpu_device; // CUDA device
|
|
684
|
+
};
|
|
685
|
+
|
|
686
|
+
WHISPER_API struct whisper_vad_context_params whisper_vad_default_context_params(void);
|
|
687
|
+
|
|
688
|
+
WHISPER_API struct whisper_vad_context * whisper_vad_init_from_file_with_params(const char * path_model, struct whisper_vad_context_params params);
|
|
689
|
+
WHISPER_API struct whisper_vad_context * whisper_vad_init_with_params (struct whisper_model_loader * loader, struct whisper_vad_context_params params);
|
|
690
|
+
|
|
691
|
+
WHISPER_API bool whisper_vad_detect_speech(
|
|
692
|
+
struct whisper_vad_context * vctx,
|
|
693
|
+
const float * samples,
|
|
694
|
+
int n_samples);
|
|
695
|
+
|
|
696
|
+
WHISPER_API int whisper_vad_n_probs(struct whisper_vad_context * vctx);
|
|
697
|
+
WHISPER_API float * whisper_vad_probs (struct whisper_vad_context * vctx);
|
|
698
|
+
|
|
699
|
+
struct whisper_vad_segments;
|
|
700
|
+
|
|
701
|
+
WHISPER_API struct whisper_vad_segments * whisper_vad_segments_from_probs(
|
|
702
|
+
struct whisper_vad_context * vctx,
|
|
703
|
+
struct whisper_vad_params params);
|
|
704
|
+
|
|
705
|
+
WHISPER_API struct whisper_vad_segments * whisper_vad_segments_from_samples(
|
|
706
|
+
struct whisper_vad_context * vctx,
|
|
707
|
+
struct whisper_vad_params params,
|
|
708
|
+
const float * samples,
|
|
709
|
+
int n_samples);
|
|
710
|
+
|
|
711
|
+
WHISPER_API int whisper_vad_segments_n_segments(struct whisper_vad_segments * segments);
|
|
712
|
+
|
|
713
|
+
WHISPER_API float whisper_vad_segments_get_segment_t0(struct whisper_vad_segments * segments, int i_segment);
|
|
714
|
+
WHISPER_API float whisper_vad_segments_get_segment_t1(struct whisper_vad_segments * segments, int i_segment);
|
|
715
|
+
|
|
716
|
+
WHISPER_API void whisper_vad_free_segments(struct whisper_vad_segments * segments);
|
|
717
|
+
WHISPER_API void whisper_vad_free (struct whisper_vad_context * ctx);
|
|
718
|
+
|
|
665
719
|
////////////////////////////////////////////////////////////////////////////
|
|
666
720
|
|
|
667
721
|
// Temporary helpers needed for exposing ggml interface
|
|
@@ -675,6 +729,9 @@ extern "C" {
|
|
|
675
729
|
|
|
676
730
|
WHISPER_API void whisper_log_set(wsp_ggml_log_callback log_callback, void * user_data);
|
|
677
731
|
|
|
732
|
+
// Get the no_speech probability for the specified segment
|
|
733
|
+
WHISPER_API float whisper_full_get_segment_no_speech_prob (struct whisper_context * ctx, int i_segment);
|
|
734
|
+
WHISPER_API float whisper_full_get_segment_no_speech_prob_from_state(struct whisper_state * state, int i_segment);
|
|
678
735
|
#ifdef __cplusplus
|
|
679
736
|
}
|
|
680
737
|
#endif
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
cmake_minimum_required(VERSION 3.16)
|
|
2
|
+
project(rnwhisper VERSION 1.0.0 LANGUAGES CXX C)
|
|
3
|
+
|
|
4
|
+
set(CMAKE_CXX_STANDARD 17)
|
|
5
|
+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
|
6
|
+
|
|
7
|
+
# iOS specific settings
|
|
8
|
+
set(CMAKE_OSX_DEPLOYMENT_TARGET 13.0)
|
|
9
|
+
set(CMAKE_XCODE_ATTRIBUTE_ENABLE_BITCODE NO)
|
|
10
|
+
|
|
11
|
+
# Dependencies and compile options
|
|
12
|
+
add_definitions(
|
|
13
|
+
-DNDEBUG
|
|
14
|
+
-DO3
|
|
15
|
+
-DWSP_GGML_USE_CPU
|
|
16
|
+
-DWSP_GGML_USE_ACCELERATE
|
|
17
|
+
-DWSP_GGML_USE_METAL
|
|
18
|
+
-DWSP_GGML_METAL_USE_BF16
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64;x86_64")
|
|
22
|
+
add_definitions(-DWSP_GGML_CPU_GENERIC)
|
|
23
|
+
endif ()
|
|
24
|
+
|
|
25
|
+
set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../cpp)
|
|
26
|
+
|
|
27
|
+
if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64")
|
|
28
|
+
set(SOURCE_FILES_ARCH
|
|
29
|
+
${SOURCE_DIR}/ggml-cpu/arch/arm/quants.c
|
|
30
|
+
${SOURCE_DIR}/ggml-cpu/arch/arm/repack.cpp
|
|
31
|
+
)
|
|
32
|
+
endif ()
|
|
33
|
+
|
|
34
|
+
# Define public headers
|
|
35
|
+
set(PUBLIC_HEADERS
|
|
36
|
+
${SOURCE_DIR}/rn-whisper.h
|
|
37
|
+
${SOURCE_DIR}/whisper.h
|
|
38
|
+
${SOURCE_DIR}/ggml.h
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
# Create library target
|
|
42
|
+
add_library(rnwhisper SHARED
|
|
43
|
+
${SOURCE_DIR}/ggml.c
|
|
44
|
+
${SOURCE_DIR}/ggml-alloc.c
|
|
45
|
+
${SOURCE_DIR}/ggml-backend.cpp
|
|
46
|
+
${SOURCE_DIR}/ggml-backend-reg.cpp
|
|
47
|
+
${SOURCE_DIR}/ggml-cpu/amx/amx.cpp
|
|
48
|
+
${SOURCE_DIR}/ggml-cpu/amx/mmq.cpp
|
|
49
|
+
${SOURCE_DIR}/ggml-cpu/ggml-cpu.c
|
|
50
|
+
${SOURCE_DIR}/ggml-cpu/ggml-cpu.cpp
|
|
51
|
+
${SOURCE_DIR}/ggml-cpu/quants.c
|
|
52
|
+
${SOURCE_DIR}/ggml-cpu/traits.cpp
|
|
53
|
+
${SOURCE_DIR}/ggml-cpu/repack.cpp
|
|
54
|
+
${SOURCE_DIR}/ggml-cpu/unary-ops.cpp
|
|
55
|
+
${SOURCE_DIR}/ggml-cpu/binary-ops.cpp
|
|
56
|
+
${SOURCE_DIR}/ggml-cpu/vec.cpp
|
|
57
|
+
${SOURCE_DIR}/ggml-cpu/ops.cpp
|
|
58
|
+
${SOURCE_DIR}/ggml-metal.m
|
|
59
|
+
${SOURCE_DIR}/ggml-opt.cpp
|
|
60
|
+
${SOURCE_DIR}/ggml-threading.cpp
|
|
61
|
+
${SOURCE_DIR}/ggml-quants.c
|
|
62
|
+
${SOURCE_DIR}/gguf.cpp
|
|
63
|
+
${SOURCE_DIR}/whisper.cpp
|
|
64
|
+
${SOURCE_DIR}/rn-whisper.cpp
|
|
65
|
+
${SOURCE_DIR}/rn-audioutils.cpp
|
|
66
|
+
${SOURCE_FILES_ARCH}
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# Setup include directories
|
|
70
|
+
target_include_directories(rnwhisper
|
|
71
|
+
PUBLIC
|
|
72
|
+
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../cpp>
|
|
73
|
+
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../cpp/ggml-cpu>
|
|
74
|
+
$<INSTALL_INTERFACE:include>
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# Link required frameworks
|
|
78
|
+
target_link_libraries(rnwhisper PRIVATE
|
|
79
|
+
"-framework Accelerate"
|
|
80
|
+
"-framework Foundation"
|
|
81
|
+
"-framework Metal"
|
|
82
|
+
"-framework MetalKit"
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# Set properties for framework
|
|
86
|
+
set_target_properties(rnwhisper PROPERTIES
|
|
87
|
+
MACOSX_FRAMEWORK_IDENTIFIER "com.rnwhisper"
|
|
88
|
+
MACOSX_FRAMEWORK_BUNDLE_VERSION 1.0.0
|
|
89
|
+
MACOSX_FRAMEWORK_SHORT_VERSION_STRING 1.0.0
|
|
90
|
+
FRAMEWORK TRUE
|
|
91
|
+
FRAMEWORK_VERSION 1.0.0
|
|
92
|
+
VERSION 1.0.0
|
|
93
|
+
PUBLIC_HEADER "${PUBLIC_HEADERS}"
|
|
94
|
+
XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC NO
|
|
95
|
+
)
|
package/ios/RNWhisper.h
CHANGED
package/ios/RNWhisper.mm
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
#import "RNWhisper.h"
|
|
2
2
|
#import "RNWhisperContext.h"
|
|
3
|
+
#import "RNWhisperVadContext.h"
|
|
3
4
|
#import "RNWhisperDownloader.h"
|
|
4
5
|
#import "RNWhisperAudioUtils.h"
|
|
5
6
|
#import "RNWhisperAudioSessionUtils.h"
|
|
@@ -13,6 +14,7 @@
|
|
|
13
14
|
@implementation RNWhisper
|
|
14
15
|
|
|
15
16
|
NSMutableDictionary *contexts;
|
|
17
|
+
NSMutableDictionary *vadContexts;
|
|
16
18
|
|
|
17
19
|
RCT_EXPORT_MODULE()
|
|
18
20
|
|
|
@@ -366,6 +368,15 @@ RCT_REMAP_METHOD(releaseAllContexts,
|
|
|
366
368
|
[context invalidate];
|
|
367
369
|
}
|
|
368
370
|
|
|
371
|
+
if (vadContexts != nil) {
|
|
372
|
+
for (NSNumber *contextId in vadContexts) {
|
|
373
|
+
RNWhisperVadContext *vadContext = vadContexts[contextId];
|
|
374
|
+
[vadContext invalidate];
|
|
375
|
+
}
|
|
376
|
+
[vadContexts removeAllObjects];
|
|
377
|
+
vadContexts = nil;
|
|
378
|
+
}
|
|
379
|
+
|
|
369
380
|
rnwhisper::job_abort_all(); // graceful abort
|
|
370
381
|
|
|
371
382
|
[contexts removeAllObjects];
|
|
@@ -437,6 +448,142 @@ RCT_REMAP_METHOD(setAudioSessionActive,
|
|
|
437
448
|
resolve(nil);
|
|
438
449
|
}
|
|
439
450
|
|
|
451
|
+
RCT_REMAP_METHOD(initVadContext,
|
|
452
|
+
withVadOptions:(NSDictionary *)vadOptions
|
|
453
|
+
withResolver:(RCTPromiseResolveBlock)resolve
|
|
454
|
+
withRejecter:(RCTPromiseRejectBlock)reject)
|
|
455
|
+
{
|
|
456
|
+
if (vadContexts == nil) {
|
|
457
|
+
vadContexts = [[NSMutableDictionary alloc] init];
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
NSString *modelPath = [vadOptions objectForKey:@"filePath"];
|
|
461
|
+
BOOL isBundleAsset = [[vadOptions objectForKey:@"isBundleAsset"] boolValue];
|
|
462
|
+
BOOL useGpu = [[vadOptions objectForKey:@"useGpu"] boolValue];
|
|
463
|
+
NSNumber *nThreads = [vadOptions objectForKey:@"nThreads"];
|
|
464
|
+
|
|
465
|
+
NSString *path = modelPath;
|
|
466
|
+
if ([path hasPrefix:@"http://"] || [path hasPrefix:@"https://"]) {
|
|
467
|
+
path = [RNWhisperDownloader downloadFile:path toFile:nil];
|
|
468
|
+
}
|
|
469
|
+
if (isBundleAsset) {
|
|
470
|
+
path = [[NSBundle mainBundle] pathForResource:modelPath ofType:nil];
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
int contextId = arc4random_uniform(1000000);
|
|
474
|
+
|
|
475
|
+
RNWhisperVadContext *vadContext = [RNWhisperVadContext
|
|
476
|
+
initWithModelPath:path
|
|
477
|
+
contextId:contextId
|
|
478
|
+
noMetal:!useGpu
|
|
479
|
+
nThreads:nThreads
|
|
480
|
+
];
|
|
481
|
+
if ([vadContext getVadContext] == NULL) {
|
|
482
|
+
reject(@"whisper_vad_error", @"Failed to load the VAD model", nil);
|
|
483
|
+
return;
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
[vadContexts setObject:vadContext forKey:[NSNumber numberWithInt:contextId]];
|
|
487
|
+
|
|
488
|
+
resolve(@{
|
|
489
|
+
@"contextId": @(contextId),
|
|
490
|
+
@"gpu": @([vadContext isMetalEnabled]),
|
|
491
|
+
@"reasonNoGPU": [vadContext reasonNoMetal],
|
|
492
|
+
});
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
RCT_REMAP_METHOD(vadDetectSpeech,
|
|
496
|
+
withContextId:(int)contextId
|
|
497
|
+
withAudioData:(NSString *)audioDataBase64
|
|
498
|
+
withOptions:(NSDictionary *)options
|
|
499
|
+
withResolver:(RCTPromiseResolveBlock)resolve
|
|
500
|
+
withRejecter:(RCTPromiseRejectBlock)reject)
|
|
501
|
+
{
|
|
502
|
+
RNWhisperVadContext *vadContext = vadContexts[[NSNumber numberWithInt:contextId]];
|
|
503
|
+
|
|
504
|
+
if (vadContext == nil) {
|
|
505
|
+
reject(@"whisper_vad_error", @"VAD context not found", nil);
|
|
506
|
+
return;
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
// Decode base64 audio data
|
|
510
|
+
NSData *audioData = [[NSData alloc] initWithBase64EncodedString:audioDataBase64 options:0];
|
|
511
|
+
if (audioData == nil) {
|
|
512
|
+
reject(@"whisper_vad_error", @"Invalid audio data", nil);
|
|
513
|
+
return;
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
NSArray *segments = [vadContext detectSpeech:audioData options:options];
|
|
517
|
+
resolve(segments);
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
RCT_REMAP_METHOD(vadDetectSpeechFile,
|
|
521
|
+
withVadContextId:(int)contextId
|
|
522
|
+
withFilePath:(NSString *)filePath
|
|
523
|
+
withOptions:(NSDictionary *)options
|
|
524
|
+
withResolver:(RCTPromiseResolveBlock)resolve
|
|
525
|
+
withRejecter:(RCTPromiseRejectBlock)reject)
|
|
526
|
+
{
|
|
527
|
+
RNWhisperVadContext *vadContext = vadContexts[[NSNumber numberWithInt:contextId]];
|
|
528
|
+
|
|
529
|
+
if (vadContext == nil) {
|
|
530
|
+
reject(@"whisper_vad_error", @"VAD context not found", nil);
|
|
531
|
+
return;
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
// Handle different input types like transcribeFile does
|
|
535
|
+
float *data = nil;
|
|
536
|
+
int count = 0;
|
|
537
|
+
if ([filePath hasPrefix:@"http://"] || [filePath hasPrefix:@"https://"]) {
|
|
538
|
+
NSString *path = [RNWhisperDownloader downloadFile:filePath toFile:nil];
|
|
539
|
+
data = [RNWhisperAudioUtils decodeWaveFile:path count:&count];
|
|
540
|
+
} else if ([filePath hasPrefix:@"data:audio/wav;base64,"]) {
|
|
541
|
+
NSData *waveData = [[NSData alloc] initWithBase64EncodedString:[filePath substringFromIndex:22] options:0];
|
|
542
|
+
data = [RNWhisperAudioUtils decodeWaveData:waveData count:&count cutHeader:YES];
|
|
543
|
+
} else {
|
|
544
|
+
data = [RNWhisperAudioUtils decodeWaveFile:filePath count:&count];
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
if (data == nil) {
|
|
548
|
+
reject(@"whisper_vad_error", @"Failed to load or decode audio file", nil);
|
|
549
|
+
return;
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
// Convert float32 data to NSData for VAD context
|
|
553
|
+
NSData *audioData = [NSData dataWithBytes:data length:count * sizeof(float)];
|
|
554
|
+
|
|
555
|
+
NSArray *segments = [vadContext detectSpeech:audioData options:options];
|
|
556
|
+
resolve(segments);
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
RCT_REMAP_METHOD(releaseVadContext,
|
|
560
|
+
withVadContextId:(int)contextId
|
|
561
|
+
withResolver:(RCTPromiseResolveBlock)resolve
|
|
562
|
+
withRejecter:(RCTPromiseRejectBlock)reject)
|
|
563
|
+
{
|
|
564
|
+
RNWhisperVadContext *vadContext = vadContexts[[NSNumber numberWithInt:contextId]];
|
|
565
|
+
if (vadContext == nil) {
|
|
566
|
+
reject(@"whisper_vad_error", @"VAD context not found", nil);
|
|
567
|
+
return;
|
|
568
|
+
}
|
|
569
|
+
[vadContext invalidate];
|
|
570
|
+
[vadContexts removeObjectForKey:[NSNumber numberWithInt:contextId]];
|
|
571
|
+
resolve(nil);
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
RCT_EXPORT_METHOD(releaseAllVadContexts:(RCTPromiseResolveBlock)resolve
|
|
575
|
+
withRejecter:(RCTPromiseRejectBlock)reject)
|
|
576
|
+
{
|
|
577
|
+
if (vadContexts != nil) {
|
|
578
|
+
for (NSNumber *contextId in vadContexts) {
|
|
579
|
+
RNWhisperVadContext *vadContext = vadContexts[contextId];
|
|
580
|
+
[vadContext invalidate];
|
|
581
|
+
}
|
|
582
|
+
[vadContexts removeAllObjects];
|
|
583
|
+
}
|
|
584
|
+
resolve(nil);
|
|
585
|
+
}
|
|
586
|
+
|
|
440
587
|
#ifdef RCT_NEW_ARCH_ENABLED
|
|
441
588
|
- (std::shared_ptr<facebook::react::TurboModule>)getTurboModule:
|
|
442
589
|
(const facebook::react::ObjCTurboModule::InitParams &)params
|
package/ios/RNWhisperContext.h
CHANGED
package/ios/RNWhisperContext.mm
CHANGED
|
@@ -19,8 +19,9 @@
|
|
|
19
19
|
cparams.use_gpu = !noMetal;
|
|
20
20
|
cparams.flash_attn = useFlashAttn;
|
|
21
21
|
|
|
22
|
-
// TODO:
|
|
22
|
+
// TODO: Expose dtw_token_timestamps and dtw_aheads_preset
|
|
23
23
|
cparams.dtw_token_timestamps = false;
|
|
24
|
+
// cparams.dtw_aheads_preset = WHISPER_AHEADS_BASE;
|
|
24
25
|
|
|
25
26
|
cparams.use_coreml = !noCoreML;
|
|
26
27
|
#ifndef WHISPER_USE_COREML
|
|
@@ -35,36 +36,30 @@
|
|
|
35
36
|
NSLog(@"[RNWhisper] ggml-metal is not enabled in this build, ignoring use_gpu option");
|
|
36
37
|
cparams.use_gpu = false;
|
|
37
38
|
}
|
|
39
|
+
reasonNoMetal = @"Metal is not enabled in this build";
|
|
38
40
|
#endif
|
|
39
41
|
|
|
40
42
|
#ifdef WSP_GGML_USE_METAL
|
|
41
43
|
if (cparams.use_gpu) {
|
|
42
|
-
#if TARGET_OS_SIMULATOR
|
|
43
|
-
NSLog(@"[RNWhisper] ggml-metal is not available in simulator, ignoring use_gpu option: %@", reasonNoMetal);
|
|
44
|
-
cparams.use_gpu = false;
|
|
45
|
-
#else // TARGET_OS_SIMULATOR
|
|
46
|
-
// Check ggml-metal availability
|
|
47
|
-
NSError * error = nil;
|
|
48
44
|
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
error:&error
|
|
55
|
-
];
|
|
56
|
-
if (error) {
|
|
57
|
-
reasonNoMetal = [error localizedDescription];
|
|
58
|
-
} else {
|
|
59
|
-
id<MTLFunction> kernel = [library newFunctionWithName:@"test"];
|
|
60
|
-
id<MTLComputePipelineState> pipeline = [device newComputePipelineStateWithFunction:kernel error:&error];
|
|
61
|
-
if (pipeline == nil) {
|
|
62
|
-
reasonNoMetal = [error localizedDescription];
|
|
63
|
-
NSLog(@"[RNWhisper] ggml-metal is not available, ignoring use_gpu option: %@", reasonNoMetal);
|
|
64
|
-
cparams.use_gpu = false;
|
|
65
|
-
}
|
|
45
|
+
|
|
46
|
+
// Check ggml-metal availability
|
|
47
|
+
BOOL supportsGgmlMetal = [device supportsFamily:MTLGPUFamilyApple7];
|
|
48
|
+
if (@available(iOS 16.0, tvOS 16.0, *)) {
|
|
49
|
+
supportsGgmlMetal = supportsGgmlMetal && [device supportsFamily:MTLGPUFamilyMetal3];
|
|
66
50
|
}
|
|
67
|
-
|
|
51
|
+
if (!supportsGgmlMetal) {
|
|
52
|
+
cparams.use_gpu = false;
|
|
53
|
+
reasonNoMetal = @"Metal is not supported in this device";
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
#if TARGET_OS_SIMULATOR
|
|
57
|
+
// Use the backend, but no layers because not supported fully on simulator
|
|
58
|
+
cparams.use_gpu = false;
|
|
59
|
+
reasonNoMetal = @"Metal is not supported in simulator";
|
|
60
|
+
#endif
|
|
61
|
+
|
|
62
|
+
device = nil;
|
|
68
63
|
}
|
|
69
64
|
#endif // WSP_GGML_USE_METAL
|
|
70
65
|
|
|
@@ -431,6 +426,7 @@ struct rnwhisper_segments_callback_data {
|
|
|
431
426
|
self->recordState.job = job;
|
|
432
427
|
int code = [self fullTranscribe:job audioData:audioData audioDataCount:audioDataCount];
|
|
433
428
|
rnwhisper::job_remove(jobId);
|
|
429
|
+
self->recordState.job = nullptr;
|
|
434
430
|
self->recordState.isTranscribing = false;
|
|
435
431
|
onEnd(code);
|
|
436
432
|
});
|
|
@@ -445,7 +441,7 @@ struct rnwhisper_segments_callback_data {
|
|
|
445
441
|
}
|
|
446
442
|
|
|
447
443
|
- (void)stopTranscribe:(int)jobId {
|
|
448
|
-
if (self->recordState.job) self->recordState.job->abort();
|
|
444
|
+
if (self->recordState.job != nullptr) self->recordState.job->abort();
|
|
449
445
|
if (self->recordState.isRealtime && self->recordState.isCapturing) {
|
|
450
446
|
[self stopAudio];
|
|
451
447
|
if (!self->recordState.isTranscribing) {
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#ifdef __cplusplus
|
|
2
|
+
#if RNWHISPER_BUILD_FROM_SOURCE
|
|
3
|
+
#import "whisper.h"
|
|
4
|
+
#import "rn-whisper.h"
|
|
5
|
+
#else
|
|
6
|
+
#import <rnwhisper/whisper.h>
|
|
7
|
+
#import <rnwhisper/rn-whisper.h>
|
|
8
|
+
#endif
|
|
9
|
+
#endif
|
|
10
|
+
|
|
11
|
+
#import <Foundation/Foundation.h>
|
|
12
|
+
|
|
13
|
+
@interface RNWhisperVadContext : NSObject {
|
|
14
|
+
int contextId;
|
|
15
|
+
dispatch_queue_t dQueue;
|
|
16
|
+
struct whisper_vad_context * vctx;
|
|
17
|
+
NSString * reasonNoMetal;
|
|
18
|
+
bool isMetalEnabled;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
+ (instancetype)initWithModelPath:(NSString *)modelPath contextId:(int)contextId noMetal:(BOOL)noMetal nThreads:(NSNumber *)nThreads;
|
|
22
|
+
- (bool)isMetalEnabled;
|
|
23
|
+
- (NSString *)reasonNoMetal;
|
|
24
|
+
- (struct whisper_vad_context *)getVadContext;
|
|
25
|
+
- (dispatch_queue_t)getDispatchQueue;
|
|
26
|
+
- (NSArray *)detectSpeech:(NSData *)audioData options:(NSDictionary *)options;
|
|
27
|
+
- (void)invalidate;
|
|
28
|
+
|
|
29
|
+
@end
|