whisper.rn 0.4.0-rc.9 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -1
- package/android/build.gradle +12 -3
- package/android/src/main/CMakeLists.txt +43 -13
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +33 -35
- package/android/src/main/jni.cpp +9 -0
- package/android/src/main/jniLibs/arm64-v8a/librnwhisper.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnwhisper_v8fp16_va_2.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/librnwhisper.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/librnwhisper_vfpv4.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnwhisper.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnwhisper_x86_64.so +0 -0
- package/cpp/coreml/whisper-compat.h +10 -0
- package/cpp/coreml/whisper-compat.m +35 -0
- package/cpp/coreml/whisper-decoder-impl.h +27 -15
- package/cpp/coreml/whisper-decoder-impl.m +36 -10
- package/cpp/coreml/whisper-encoder-impl.h +21 -9
- package/cpp/coreml/whisper-encoder-impl.m +29 -3
- package/cpp/ggml-alloc.c +39 -37
- package/cpp/ggml-alloc.h +1 -1
- package/cpp/ggml-backend-impl.h +55 -27
- package/cpp/ggml-backend-reg.cpp +591 -0
- package/cpp/ggml-backend.cpp +336 -955
- package/cpp/ggml-backend.h +70 -42
- package/cpp/ggml-common.h +57 -49
- package/cpp/ggml-cpp.h +39 -0
- package/cpp/ggml-cpu/amx/amx.cpp +221 -0
- package/cpp/ggml-cpu/amx/amx.h +8 -0
- package/cpp/ggml-cpu/amx/common.h +91 -0
- package/cpp/ggml-cpu/amx/mmq.cpp +2511 -0
- package/cpp/ggml-cpu/amx/mmq.h +10 -0
- package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/cpp/ggml-cpu/arch/arm/quants.c +4113 -0
- package/cpp/ggml-cpu/arch/arm/repack.cpp +2162 -0
- package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
- package/cpp/ggml-cpu/arch/x86/quants.c +4310 -0
- package/cpp/ggml-cpu/arch/x86/repack.cpp +3284 -0
- package/cpp/ggml-cpu/arch-fallback.h +184 -0
- package/cpp/ggml-cpu/binary-ops.cpp +158 -0
- package/cpp/ggml-cpu/binary-ops.h +16 -0
- package/cpp/ggml-cpu/common.h +72 -0
- package/cpp/ggml-cpu/ggml-cpu-impl.h +511 -0
- package/cpp/ggml-cpu/ggml-cpu.c +3473 -0
- package/cpp/ggml-cpu/ggml-cpu.cpp +671 -0
- package/cpp/ggml-cpu/ops.cpp +9085 -0
- package/cpp/ggml-cpu/ops.h +111 -0
- package/cpp/ggml-cpu/quants.c +1157 -0
- package/cpp/ggml-cpu/quants.h +89 -0
- package/cpp/ggml-cpu/repack.cpp +1570 -0
- package/cpp/ggml-cpu/repack.h +98 -0
- package/cpp/ggml-cpu/simd-mappings.h +1006 -0
- package/cpp/ggml-cpu/traits.cpp +36 -0
- package/cpp/ggml-cpu/traits.h +38 -0
- package/cpp/ggml-cpu/unary-ops.cpp +186 -0
- package/cpp/ggml-cpu/unary-ops.h +28 -0
- package/cpp/ggml-cpu/vec.cpp +321 -0
- package/cpp/ggml-cpu/vec.h +973 -0
- package/cpp/ggml-cpu.h +143 -0
- package/cpp/ggml-impl.h +417 -23
- package/cpp/ggml-metal-impl.h +622 -0
- package/cpp/ggml-metal.h +9 -9
- package/cpp/ggml-metal.m +3451 -1344
- package/cpp/ggml-opt.cpp +1037 -0
- package/cpp/ggml-opt.h +237 -0
- package/cpp/ggml-quants.c +296 -10818
- package/cpp/ggml-quants.h +78 -125
- package/cpp/ggml-threading.cpp +12 -0
- package/cpp/ggml-threading.h +14 -0
- package/cpp/ggml-whisper-sim.metallib +0 -0
- package/cpp/ggml-whisper.metallib +0 -0
- package/cpp/ggml.c +4633 -21450
- package/cpp/ggml.h +320 -661
- package/cpp/gguf.cpp +1347 -0
- package/cpp/gguf.h +202 -0
- package/cpp/rn-whisper.cpp +4 -11
- package/cpp/whisper-arch.h +197 -0
- package/cpp/whisper.cpp +2022 -495
- package/cpp/whisper.h +75 -18
- package/ios/CMakeLists.txt +95 -0
- package/ios/RNWhisper.h +5 -0
- package/ios/RNWhisperAudioUtils.m +4 -0
- package/ios/RNWhisperContext.h +5 -0
- package/ios/RNWhisperContext.mm +4 -2
- package/ios/rnwhisper.xcframework/Info.plist +74 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-common.h +1861 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +603 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-opt.h +237 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +2221 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/gguf.h +202 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-whisper.h +52 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper-arch.h +197 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper.h +739 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +1861 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +603 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +237 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +2221 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/gguf.h +202 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +52 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper-arch.h +197 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +739 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +101 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-common.h +1861 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +603 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-opt.h +237 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +2221 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/gguf.h +202 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-whisper.h +52 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper-arch.h +197 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper.h +739 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +1861 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +603 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +237 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +2221 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/gguf.h +202 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +52 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper-arch.h +197 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +739 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +101 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
- package/jest/mock.js +5 -0
- package/lib/commonjs/version.json +1 -1
- package/lib/module/version.json +1 -1
- package/package.json +10 -6
- package/src/version.json +1 -1
- package/whisper-rn.podspec +11 -18
- package/cpp/README.md +0 -4
- package/cpp/ggml-aarch64.c +0 -3209
- package/cpp/ggml-aarch64.h +0 -39
- package/cpp/ggml-cpu-impl.h +0 -614
package/cpp/whisper.h
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
#define WHISPER_H
|
|
3
3
|
|
|
4
4
|
#include "ggml.h"
|
|
5
|
+
#include "ggml-cpu.h"
|
|
5
6
|
|
|
6
7
|
#include <stddef.h>
|
|
7
8
|
#include <stdint.h>
|
|
@@ -189,6 +190,15 @@ extern "C" {
|
|
|
189
190
|
uint32_t value; // Unicode code point or rule ID
|
|
190
191
|
} whisper_grammar_element;
|
|
191
192
|
|
|
193
|
+
typedef struct whisper_vad_params {
|
|
194
|
+
float threshold; // Probability threshold to consider as speech.
|
|
195
|
+
int min_speech_duration_ms; // Min duration for a valid speech segment.
|
|
196
|
+
int min_silence_duration_ms; // Min silence duration to consider speech as ended.
|
|
197
|
+
float max_speech_duration_s; // Max duration of a speech segment before forcing a new segment.
|
|
198
|
+
int speech_pad_ms; // Padding added before and after speech segments.
|
|
199
|
+
float samples_overlap; // Overlap in seconds when copying audio samples from speech segment.
|
|
200
|
+
} whisper_vad_params;
|
|
201
|
+
|
|
192
202
|
// Various functions for loading a ggml whisper model.
|
|
193
203
|
// Allocate (almost) all memory needed for the model.
|
|
194
204
|
// Return NULL on failure
|
|
@@ -425,21 +435,11 @@ extern "C" {
|
|
|
425
435
|
|
|
426
436
|
// Performance information from the default state.
|
|
427
437
|
struct whisper_timings {
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
int32_t n_sample;
|
|
434
|
-
int32_t n_encode;
|
|
435
|
-
int32_t n_decode;
|
|
436
|
-
int32_t n_batchd;
|
|
437
|
-
int32_t n_prompt;
|
|
438
|
-
int64_t t_sample_us;
|
|
439
|
-
int64_t t_encode_us;
|
|
440
|
-
int64_t t_decode_us;
|
|
441
|
-
int64_t t_batchd_us;
|
|
442
|
-
int64_t t_prompt_us;
|
|
438
|
+
float sample_ms;
|
|
439
|
+
float encode_ms;
|
|
440
|
+
float decode_ms;
|
|
441
|
+
float batchd_ms;
|
|
442
|
+
float prompt_ms;
|
|
443
443
|
};
|
|
444
444
|
WHISPER_API struct whisper_timings * whisper_get_timings(struct whisper_context * ctx);
|
|
445
445
|
WHISPER_API void whisper_print_timings(struct whisper_context * ctx);
|
|
@@ -532,8 +532,8 @@ extern "C" {
|
|
|
532
532
|
bool detect_language;
|
|
533
533
|
|
|
534
534
|
// common decoding parameters:
|
|
535
|
-
bool suppress_blank;
|
|
536
|
-
bool
|
|
535
|
+
bool suppress_blank; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L89
|
|
536
|
+
bool suppress_nst; // non-speech tokens, ref: https://github.com/openai/whisper/blob/7858aa9c08d98f75575035ecd6481f462d66ca27/whisper/tokenizer.py#L224-L253
|
|
537
537
|
|
|
538
538
|
float temperature; // initial decoding temperature, ref: https://ai.stackexchange.com/a/32478
|
|
539
539
|
float max_initial_ts; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L97
|
|
@@ -544,7 +544,7 @@ extern "C" {
|
|
|
544
544
|
float temperature_inc;
|
|
545
545
|
float entropy_thold; // similar to OpenAI's "compression_ratio_threshold"
|
|
546
546
|
float logprob_thold;
|
|
547
|
-
float no_speech_thold;
|
|
547
|
+
float no_speech_thold;
|
|
548
548
|
|
|
549
549
|
struct {
|
|
550
550
|
int best_of; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L264
|
|
@@ -580,11 +580,18 @@ extern "C" {
|
|
|
580
580
|
size_t n_grammar_rules;
|
|
581
581
|
size_t i_start_rule;
|
|
582
582
|
float grammar_penalty;
|
|
583
|
+
|
|
584
|
+
// Voice Activity Detection (VAD) params
|
|
585
|
+
bool vad; // Enable VAD
|
|
586
|
+
const char * vad_model_path; // Path to VAD model
|
|
587
|
+
|
|
588
|
+
whisper_vad_params vad_params;
|
|
583
589
|
};
|
|
584
590
|
|
|
585
591
|
// NOTE: this function allocates memory, and it is the responsibility of the caller to free the pointer - see whisper_free_context_params & whisper_free_params()
|
|
586
592
|
WHISPER_API struct whisper_context_params * whisper_context_default_params_by_ref(void);
|
|
587
593
|
WHISPER_API struct whisper_context_params whisper_context_default_params (void);
|
|
594
|
+
|
|
588
595
|
WHISPER_API struct whisper_full_params * whisper_full_default_params_by_ref(enum whisper_sampling_strategy strategy);
|
|
589
596
|
WHISPER_API struct whisper_full_params whisper_full_default_params (enum whisper_sampling_strategy strategy);
|
|
590
597
|
|
|
@@ -662,6 +669,53 @@ extern "C" {
|
|
|
662
669
|
WHISPER_API float whisper_full_get_token_p (struct whisper_context * ctx, int i_segment, int i_token);
|
|
663
670
|
WHISPER_API float whisper_full_get_token_p_from_state(struct whisper_state * state, int i_segment, int i_token);
|
|
664
671
|
|
|
672
|
+
//
|
|
673
|
+
// Voice Activity Detection (VAD)
|
|
674
|
+
//
|
|
675
|
+
|
|
676
|
+
struct whisper_vad_context;
|
|
677
|
+
|
|
678
|
+
WHISPER_API struct whisper_vad_params whisper_vad_default_params(void);
|
|
679
|
+
|
|
680
|
+
struct whisper_vad_context_params {
|
|
681
|
+
int n_threads; // The number of threads to use for processing.
|
|
682
|
+
bool use_gpu;
|
|
683
|
+
int gpu_device; // CUDA device
|
|
684
|
+
};
|
|
685
|
+
|
|
686
|
+
WHISPER_API struct whisper_vad_context_params whisper_vad_default_context_params(void);
|
|
687
|
+
|
|
688
|
+
WHISPER_API struct whisper_vad_context * whisper_vad_init_from_file_with_params(const char * path_model, struct whisper_vad_context_params params);
|
|
689
|
+
WHISPER_API struct whisper_vad_context * whisper_vad_init_with_params (struct whisper_model_loader * loader, struct whisper_vad_context_params params);
|
|
690
|
+
|
|
691
|
+
WHISPER_API bool whisper_vad_detect_speech(
|
|
692
|
+
struct whisper_vad_context * vctx,
|
|
693
|
+
const float * samples,
|
|
694
|
+
int n_samples);
|
|
695
|
+
|
|
696
|
+
WHISPER_API int whisper_vad_n_probs(struct whisper_vad_context * vctx);
|
|
697
|
+
WHISPER_API float * whisper_vad_probs (struct whisper_vad_context * vctx);
|
|
698
|
+
|
|
699
|
+
struct whisper_vad_segments;
|
|
700
|
+
|
|
701
|
+
WHISPER_API struct whisper_vad_segments * whisper_vad_segments_from_probs(
|
|
702
|
+
struct whisper_vad_context * vctx,
|
|
703
|
+
struct whisper_vad_params params);
|
|
704
|
+
|
|
705
|
+
WHISPER_API struct whisper_vad_segments * whisper_vad_segments_from_samples(
|
|
706
|
+
struct whisper_vad_context * vctx,
|
|
707
|
+
struct whisper_vad_params params,
|
|
708
|
+
const float * samples,
|
|
709
|
+
int n_samples);
|
|
710
|
+
|
|
711
|
+
WHISPER_API int whisper_vad_segments_n_segments(struct whisper_vad_segments * segments);
|
|
712
|
+
|
|
713
|
+
WHISPER_API float whisper_vad_segments_get_segment_t0(struct whisper_vad_segments * segments, int i_segment);
|
|
714
|
+
WHISPER_API float whisper_vad_segments_get_segment_t1(struct whisper_vad_segments * segments, int i_segment);
|
|
715
|
+
|
|
716
|
+
WHISPER_API void whisper_vad_free_segments(struct whisper_vad_segments * segments);
|
|
717
|
+
WHISPER_API void whisper_vad_free (struct whisper_vad_context * ctx);
|
|
718
|
+
|
|
665
719
|
////////////////////////////////////////////////////////////////////////////
|
|
666
720
|
|
|
667
721
|
// Temporary helpers needed for exposing ggml interface
|
|
@@ -675,6 +729,9 @@ extern "C" {
|
|
|
675
729
|
|
|
676
730
|
WHISPER_API void whisper_log_set(wsp_ggml_log_callback log_callback, void * user_data);
|
|
677
731
|
|
|
732
|
+
// Get the no_speech probability for the specified segment
|
|
733
|
+
WHISPER_API float whisper_full_get_segment_no_speech_prob (struct whisper_context * ctx, int i_segment);
|
|
734
|
+
WHISPER_API float whisper_full_get_segment_no_speech_prob_from_state(struct whisper_state * state, int i_segment);
|
|
678
735
|
#ifdef __cplusplus
|
|
679
736
|
}
|
|
680
737
|
#endif
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
cmake_minimum_required(VERSION 3.16)
|
|
2
|
+
project(rnwhisper VERSION 1.0.0 LANGUAGES CXX C)
|
|
3
|
+
|
|
4
|
+
set(CMAKE_CXX_STANDARD 17)
|
|
5
|
+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
|
6
|
+
|
|
7
|
+
# iOS specific settings
|
|
8
|
+
set(CMAKE_OSX_DEPLOYMENT_TARGET 13.0)
|
|
9
|
+
set(CMAKE_XCODE_ATTRIBUTE_ENABLE_BITCODE NO)
|
|
10
|
+
|
|
11
|
+
# Dependencies and compile options
|
|
12
|
+
add_definitions(
|
|
13
|
+
-DNDEBUG
|
|
14
|
+
-DO3
|
|
15
|
+
-DWSP_GGML_USE_CPU
|
|
16
|
+
-DWSP_GGML_USE_ACCELERATE
|
|
17
|
+
-DWSP_GGML_USE_METAL
|
|
18
|
+
-DWSP_GGML_METAL_USE_BF16
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64;x86_64")
|
|
22
|
+
add_definitions(-DWSP_GGML_CPU_GENERIC)
|
|
23
|
+
endif ()
|
|
24
|
+
|
|
25
|
+
set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../cpp)
|
|
26
|
+
|
|
27
|
+
if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64")
|
|
28
|
+
set(SOURCE_FILES_ARCH
|
|
29
|
+
${SOURCE_DIR}/ggml-cpu/arch/arm/quants.c
|
|
30
|
+
${SOURCE_DIR}/ggml-cpu/arch/arm/repack.cpp
|
|
31
|
+
)
|
|
32
|
+
endif ()
|
|
33
|
+
|
|
34
|
+
# Define public headers
|
|
35
|
+
set(PUBLIC_HEADERS
|
|
36
|
+
${SOURCE_DIR}/rn-whisper.h
|
|
37
|
+
${SOURCE_DIR}/whisper.h
|
|
38
|
+
${SOURCE_DIR}/ggml.h
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
# Create library target
|
|
42
|
+
add_library(rnwhisper SHARED
|
|
43
|
+
${SOURCE_DIR}/ggml.c
|
|
44
|
+
${SOURCE_DIR}/ggml-alloc.c
|
|
45
|
+
${SOURCE_DIR}/ggml-backend.cpp
|
|
46
|
+
${SOURCE_DIR}/ggml-backend-reg.cpp
|
|
47
|
+
${SOURCE_DIR}/ggml-cpu/amx/amx.cpp
|
|
48
|
+
${SOURCE_DIR}/ggml-cpu/amx/mmq.cpp
|
|
49
|
+
${SOURCE_DIR}/ggml-cpu/ggml-cpu.c
|
|
50
|
+
${SOURCE_DIR}/ggml-cpu/ggml-cpu.cpp
|
|
51
|
+
${SOURCE_DIR}/ggml-cpu/quants.c
|
|
52
|
+
${SOURCE_DIR}/ggml-cpu/traits.cpp
|
|
53
|
+
${SOURCE_DIR}/ggml-cpu/repack.cpp
|
|
54
|
+
${SOURCE_DIR}/ggml-cpu/unary-ops.cpp
|
|
55
|
+
${SOURCE_DIR}/ggml-cpu/binary-ops.cpp
|
|
56
|
+
${SOURCE_DIR}/ggml-cpu/vec.cpp
|
|
57
|
+
${SOURCE_DIR}/ggml-cpu/ops.cpp
|
|
58
|
+
${SOURCE_DIR}/ggml-metal.m
|
|
59
|
+
${SOURCE_DIR}/ggml-opt.cpp
|
|
60
|
+
${SOURCE_DIR}/ggml-threading.cpp
|
|
61
|
+
${SOURCE_DIR}/ggml-quants.c
|
|
62
|
+
${SOURCE_DIR}/gguf.cpp
|
|
63
|
+
${SOURCE_DIR}/whisper.cpp
|
|
64
|
+
${SOURCE_DIR}/rn-whisper.cpp
|
|
65
|
+
${SOURCE_DIR}/rn-audioutils.cpp
|
|
66
|
+
${SOURCE_FILES_ARCH}
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# Setup include directories
|
|
70
|
+
target_include_directories(rnwhisper
|
|
71
|
+
PUBLIC
|
|
72
|
+
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../cpp>
|
|
73
|
+
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../cpp/ggml-cpu>
|
|
74
|
+
$<INSTALL_INTERFACE:include>
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# Link required frameworks
|
|
78
|
+
target_link_libraries(rnwhisper PRIVATE
|
|
79
|
+
"-framework Accelerate"
|
|
80
|
+
"-framework Foundation"
|
|
81
|
+
"-framework Metal"
|
|
82
|
+
"-framework MetalKit"
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# Set properties for framework
|
|
86
|
+
set_target_properties(rnwhisper PROPERTIES
|
|
87
|
+
MACOSX_FRAMEWORK_IDENTIFIER "com.rnwhisper"
|
|
88
|
+
MACOSX_FRAMEWORK_BUNDLE_VERSION 1.0.0
|
|
89
|
+
MACOSX_FRAMEWORK_SHORT_VERSION_STRING 1.0.0
|
|
90
|
+
FRAMEWORK TRUE
|
|
91
|
+
FRAMEWORK_VERSION 1.0.0
|
|
92
|
+
VERSION 1.0.0
|
|
93
|
+
PUBLIC_HEADER "${PUBLIC_HEADERS}"
|
|
94
|
+
XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC NO
|
|
95
|
+
)
|
package/ios/RNWhisper.h
CHANGED
package/ios/RNWhisperContext.h
CHANGED
package/ios/RNWhisperContext.mm
CHANGED
|
@@ -19,8 +19,9 @@
|
|
|
19
19
|
cparams.use_gpu = !noMetal;
|
|
20
20
|
cparams.flash_attn = useFlashAttn;
|
|
21
21
|
|
|
22
|
-
// TODO:
|
|
22
|
+
// TODO: Expose dtw_token_timestamps and dtw_aheads_preset
|
|
23
23
|
cparams.dtw_token_timestamps = false;
|
|
24
|
+
// cparams.dtw_aheads_preset = WHISPER_AHEADS_BASE;
|
|
24
25
|
|
|
25
26
|
cparams.use_coreml = !noCoreML;
|
|
26
27
|
#ifndef WHISPER_USE_COREML
|
|
@@ -431,6 +432,7 @@ struct rnwhisper_segments_callback_data {
|
|
|
431
432
|
self->recordState.job = job;
|
|
432
433
|
int code = [self fullTranscribe:job audioData:audioData audioDataCount:audioDataCount];
|
|
433
434
|
rnwhisper::job_remove(jobId);
|
|
435
|
+
self->recordState.job = nullptr;
|
|
434
436
|
self->recordState.isTranscribing = false;
|
|
435
437
|
onEnd(code);
|
|
436
438
|
});
|
|
@@ -445,7 +447,7 @@ struct rnwhisper_segments_callback_data {
|
|
|
445
447
|
}
|
|
446
448
|
|
|
447
449
|
- (void)stopTranscribe:(int)jobId {
|
|
448
|
-
if (self->recordState.job) self->recordState.job->abort();
|
|
450
|
+
if (self->recordState.job != nullptr) self->recordState.job->abort();
|
|
449
451
|
if (self->recordState.isRealtime && self->recordState.isCapturing) {
|
|
450
452
|
[self stopAudio];
|
|
451
453
|
if (!self->recordState.isTranscribing) {
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
+
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
|
3
|
+
<plist version="1.0">
|
|
4
|
+
<dict>
|
|
5
|
+
<key>AvailableLibraries</key>
|
|
6
|
+
<array>
|
|
7
|
+
<dict>
|
|
8
|
+
<key>LibraryIdentifier</key>
|
|
9
|
+
<string>ios-arm64</string>
|
|
10
|
+
<key>LibraryPath</key>
|
|
11
|
+
<string>rnwhisper.framework</string>
|
|
12
|
+
<key>SupportedArchitectures</key>
|
|
13
|
+
<array>
|
|
14
|
+
<string>arm64</string>
|
|
15
|
+
</array>
|
|
16
|
+
<key>SupportedPlatform</key>
|
|
17
|
+
<string>ios</string>
|
|
18
|
+
</dict>
|
|
19
|
+
<dict>
|
|
20
|
+
<key>LibraryIdentifier</key>
|
|
21
|
+
<string>ios-arm64_x86_64-simulator</string>
|
|
22
|
+
<key>LibraryPath</key>
|
|
23
|
+
<string>rnwhisper.framework</string>
|
|
24
|
+
<key>SupportedArchitectures</key>
|
|
25
|
+
<array>
|
|
26
|
+
<string>arm64</string>
|
|
27
|
+
<string>x86_64</string>
|
|
28
|
+
</array>
|
|
29
|
+
<key>SupportedPlatform</key>
|
|
30
|
+
<string>ios</string>
|
|
31
|
+
<key>SupportedPlatformVariant</key>
|
|
32
|
+
<string>simulator</string>
|
|
33
|
+
</dict>
|
|
34
|
+
<dict>
|
|
35
|
+
<key>LibraryIdentifier</key>
|
|
36
|
+
<string>tvos-arm64</string>
|
|
37
|
+
<key>LibraryPath</key>
|
|
38
|
+
<string>rnwhisper.framework</string>
|
|
39
|
+
<key>SupportedArchitectures</key>
|
|
40
|
+
<array>
|
|
41
|
+
<string>arm64</string>
|
|
42
|
+
</array>
|
|
43
|
+
<key>SupportedPlatform</key>
|
|
44
|
+
<string>tvos</string>
|
|
45
|
+
</dict>
|
|
46
|
+
<dict>
|
|
47
|
+
<key>LibraryIdentifier</key>
|
|
48
|
+
<string>tvos-arm64_x86_64-simulator</string>
|
|
49
|
+
<key>LibraryPath</key>
|
|
50
|
+
<string>rnwhisper.framework</string>
|
|
51
|
+
<key>SupportedArchitectures</key>
|
|
52
|
+
<array>
|
|
53
|
+
<string>arm64</string>
|
|
54
|
+
<string>x86_64</string>
|
|
55
|
+
</array>
|
|
56
|
+
<key>SupportedPlatform</key>
|
|
57
|
+
<string>tvos</string>
|
|
58
|
+
<key>SupportedPlatformVariant</key>
|
|
59
|
+
<string>simulator</string>
|
|
60
|
+
</dict>
|
|
61
|
+
|
|
62
|
+
</array>
|
|
63
|
+
<key>CFBundlePackageType</key>
|
|
64
|
+
<string>XFWK</string>
|
|
65
|
+
<key>XCFrameworkFormatVersion</key>
|
|
66
|
+
<string>1.0</string>
|
|
67
|
+
<key>CFBundleVersion</key>
|
|
68
|
+
<string>1.0.0</string>
|
|
69
|
+
<key>CFBundleShortVersionString</key>
|
|
70
|
+
<string>1.0.0</string>
|
|
71
|
+
<key>CFBundleIdentifier</key>
|
|
72
|
+
<string>com.rnwhisper</string>
|
|
73
|
+
</dict>
|
|
74
|
+
</plist>
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "ggml.h"
|
|
4
|
+
|
|
5
|
+
#ifdef __cplusplus
|
|
6
|
+
extern "C" {
|
|
7
|
+
#endif
|
|
8
|
+
|
|
9
|
+
typedef struct wsp_ggml_backend_buffer_type * wsp_ggml_backend_buffer_type_t;
|
|
10
|
+
typedef struct wsp_ggml_backend_buffer * wsp_ggml_backend_buffer_t;
|
|
11
|
+
typedef struct wsp_ggml_backend * wsp_ggml_backend_t;
|
|
12
|
+
|
|
13
|
+
// Tensor allocator
|
|
14
|
+
struct wsp_ggml_tallocr {
|
|
15
|
+
wsp_ggml_backend_buffer_t buffer;
|
|
16
|
+
void * base;
|
|
17
|
+
size_t alignment;
|
|
18
|
+
size_t offset;
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
WSP_GGML_API struct wsp_ggml_tallocr wsp_ggml_tallocr_new(wsp_ggml_backend_buffer_t buffer);
|
|
22
|
+
WSP_GGML_API enum wsp_ggml_status wsp_ggml_tallocr_alloc(struct wsp_ggml_tallocr * talloc, struct wsp_ggml_tensor * tensor);
|
|
23
|
+
|
|
24
|
+
// Graph allocator
|
|
25
|
+
/*
|
|
26
|
+
Example usage:
|
|
27
|
+
wsp_ggml_gallocr_t galloc = wsp_ggml_gallocr_new(wsp_ggml_backend_cpu_buffer_type());
|
|
28
|
+
|
|
29
|
+
// optional: create a worst-case graph and reserve the buffers to avoid reallocations
|
|
30
|
+
wsp_ggml_gallocr_reserve(galloc, build_graph(max_batch));
|
|
31
|
+
|
|
32
|
+
// allocate the graph
|
|
33
|
+
struct wsp_ggml_cgraph * graph = build_graph(batch);
|
|
34
|
+
wsp_ggml_gallocr_alloc_graph(galloc, graph);
|
|
35
|
+
|
|
36
|
+
printf("compute buffer size: %zu bytes\n", wsp_ggml_gallocr_get_buffer_size(galloc, 0));
|
|
37
|
+
|
|
38
|
+
// evaluate the graph
|
|
39
|
+
wsp_ggml_backend_graph_compute(backend, graph);
|
|
40
|
+
*/
|
|
41
|
+
|
|
42
|
+
// special tensor flags for use with the graph allocator:
|
|
43
|
+
// wsp_ggml_set_input(): all input tensors are allocated at the beginning of the graph in non-overlapping addresses
|
|
44
|
+
// wsp_ggml_set_output(): output tensors are never freed and never overwritten
|
|
45
|
+
|
|
46
|
+
typedef struct wsp_ggml_gallocr * wsp_ggml_gallocr_t;
|
|
47
|
+
|
|
48
|
+
WSP_GGML_API wsp_ggml_gallocr_t wsp_ggml_gallocr_new(wsp_ggml_backend_buffer_type_t buft);
|
|
49
|
+
WSP_GGML_API wsp_ggml_gallocr_t wsp_ggml_gallocr_new_n(wsp_ggml_backend_buffer_type_t * bufts, int n_bufs);
|
|
50
|
+
WSP_GGML_API void wsp_ggml_gallocr_free(wsp_ggml_gallocr_t galloc);
|
|
51
|
+
|
|
52
|
+
// pre-allocate buffers from a measure graph - does not allocate or modify the graph
|
|
53
|
+
// call with a worst-case graph to avoid buffer reallocations
|
|
54
|
+
// not strictly required for single buffer usage: wsp_ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed
|
|
55
|
+
// returns false if the buffer allocation failed
|
|
56
|
+
WSP_GGML_API bool wsp_ggml_gallocr_reserve(wsp_ggml_gallocr_t galloc, struct wsp_ggml_cgraph * graph);
|
|
57
|
+
WSP_GGML_API bool wsp_ggml_gallocr_reserve_n(
|
|
58
|
+
wsp_ggml_gallocr_t galloc,
|
|
59
|
+
struct wsp_ggml_cgraph * graph,
|
|
60
|
+
const int * node_buffer_ids,
|
|
61
|
+
const int * leaf_buffer_ids);
|
|
62
|
+
|
|
63
|
+
// automatic reallocation if the topology changes when using a single buffer
|
|
64
|
+
// returns false if using multiple buffers and a re-allocation is needed (call wsp_ggml_gallocr_reserve_n first to set the node buffers)
|
|
65
|
+
WSP_GGML_API bool wsp_ggml_gallocr_alloc_graph(wsp_ggml_gallocr_t galloc, struct wsp_ggml_cgraph * graph);
|
|
66
|
+
|
|
67
|
+
WSP_GGML_API size_t wsp_ggml_gallocr_get_buffer_size(wsp_ggml_gallocr_t galloc, int buffer_id);
|
|
68
|
+
|
|
69
|
+
// Utils
|
|
70
|
+
// Create a buffer and allocate all the tensors in a wsp_ggml_context
|
|
71
|
+
WSP_GGML_API struct wsp_ggml_backend_buffer * wsp_ggml_backend_alloc_ctx_tensors_from_buft(struct wsp_ggml_context * ctx, wsp_ggml_backend_buffer_type_t buft);
|
|
72
|
+
WSP_GGML_API struct wsp_ggml_backend_buffer * wsp_ggml_backend_alloc_ctx_tensors(struct wsp_ggml_context * ctx, wsp_ggml_backend_t backend);
|
|
73
|
+
|
|
74
|
+
#ifdef __cplusplus
|
|
75
|
+
}
|
|
76
|
+
#endif
|