whisper.rn 0.4.0-rc.9 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. package/README.md +5 -1
  2. package/android/build.gradle +12 -3
  3. package/android/src/main/CMakeLists.txt +43 -13
  4. package/android/src/main/java/com/rnwhisper/WhisperContext.java +33 -35
  5. package/android/src/main/jni.cpp +9 -0
  6. package/android/src/main/jniLibs/arm64-v8a/librnwhisper.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnwhisper_v8fp16_va_2.so +0 -0
  8. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper.so +0 -0
  9. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper_vfpv4.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/librnwhisper.so +0 -0
  11. package/android/src/main/jniLibs/x86_64/librnwhisper_x86_64.so +0 -0
  12. package/cpp/coreml/whisper-compat.h +10 -0
  13. package/cpp/coreml/whisper-compat.m +35 -0
  14. package/cpp/coreml/whisper-decoder-impl.h +27 -15
  15. package/cpp/coreml/whisper-decoder-impl.m +36 -10
  16. package/cpp/coreml/whisper-encoder-impl.h +21 -9
  17. package/cpp/coreml/whisper-encoder-impl.m +29 -3
  18. package/cpp/ggml-alloc.c +39 -37
  19. package/cpp/ggml-alloc.h +1 -1
  20. package/cpp/ggml-backend-impl.h +55 -27
  21. package/cpp/ggml-backend-reg.cpp +591 -0
  22. package/cpp/ggml-backend.cpp +336 -955
  23. package/cpp/ggml-backend.h +70 -42
  24. package/cpp/ggml-common.h +57 -49
  25. package/cpp/ggml-cpp.h +39 -0
  26. package/cpp/ggml-cpu/amx/amx.cpp +221 -0
  27. package/cpp/ggml-cpu/amx/amx.h +8 -0
  28. package/cpp/ggml-cpu/amx/common.h +91 -0
  29. package/cpp/ggml-cpu/amx/mmq.cpp +2511 -0
  30. package/cpp/ggml-cpu/amx/mmq.h +10 -0
  31. package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  32. package/cpp/ggml-cpu/arch/arm/quants.c +4113 -0
  33. package/cpp/ggml-cpu/arch/arm/repack.cpp +2162 -0
  34. package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  35. package/cpp/ggml-cpu/arch/x86/quants.c +4310 -0
  36. package/cpp/ggml-cpu/arch/x86/repack.cpp +3284 -0
  37. package/cpp/ggml-cpu/arch-fallback.h +184 -0
  38. package/cpp/ggml-cpu/binary-ops.cpp +158 -0
  39. package/cpp/ggml-cpu/binary-ops.h +16 -0
  40. package/cpp/ggml-cpu/common.h +72 -0
  41. package/cpp/ggml-cpu/ggml-cpu-impl.h +511 -0
  42. package/cpp/ggml-cpu/ggml-cpu.c +3473 -0
  43. package/cpp/ggml-cpu/ggml-cpu.cpp +671 -0
  44. package/cpp/ggml-cpu/ops.cpp +9085 -0
  45. package/cpp/ggml-cpu/ops.h +111 -0
  46. package/cpp/ggml-cpu/quants.c +1157 -0
  47. package/cpp/ggml-cpu/quants.h +89 -0
  48. package/cpp/ggml-cpu/repack.cpp +1570 -0
  49. package/cpp/ggml-cpu/repack.h +98 -0
  50. package/cpp/ggml-cpu/simd-mappings.h +1006 -0
  51. package/cpp/ggml-cpu/traits.cpp +36 -0
  52. package/cpp/ggml-cpu/traits.h +38 -0
  53. package/cpp/ggml-cpu/unary-ops.cpp +186 -0
  54. package/cpp/ggml-cpu/unary-ops.h +28 -0
  55. package/cpp/ggml-cpu/vec.cpp +321 -0
  56. package/cpp/ggml-cpu/vec.h +973 -0
  57. package/cpp/ggml-cpu.h +143 -0
  58. package/cpp/ggml-impl.h +417 -23
  59. package/cpp/ggml-metal-impl.h +622 -0
  60. package/cpp/ggml-metal.h +9 -9
  61. package/cpp/ggml-metal.m +3451 -1344
  62. package/cpp/ggml-opt.cpp +1037 -0
  63. package/cpp/ggml-opt.h +237 -0
  64. package/cpp/ggml-quants.c +296 -10818
  65. package/cpp/ggml-quants.h +78 -125
  66. package/cpp/ggml-threading.cpp +12 -0
  67. package/cpp/ggml-threading.h +14 -0
  68. package/cpp/ggml-whisper-sim.metallib +0 -0
  69. package/cpp/ggml-whisper.metallib +0 -0
  70. package/cpp/ggml.c +4633 -21450
  71. package/cpp/ggml.h +320 -661
  72. package/cpp/gguf.cpp +1347 -0
  73. package/cpp/gguf.h +202 -0
  74. package/cpp/rn-whisper.cpp +4 -11
  75. package/cpp/whisper-arch.h +197 -0
  76. package/cpp/whisper.cpp +2022 -495
  77. package/cpp/whisper.h +75 -18
  78. package/ios/CMakeLists.txt +95 -0
  79. package/ios/RNWhisper.h +5 -0
  80. package/ios/RNWhisperAudioUtils.m +4 -0
  81. package/ios/RNWhisperContext.h +5 -0
  82. package/ios/RNWhisperContext.mm +4 -2
  83. package/ios/rnwhisper.xcframework/Info.plist +74 -0
  84. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
  85. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
  86. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +354 -0
  87. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-common.h +1861 -0
  88. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
  89. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
  90. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +603 -0
  91. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
  92. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal.h +66 -0
  93. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-opt.h +237 -0
  94. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-quants.h +100 -0
  95. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-threading.h +14 -0
  96. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +2221 -0
  97. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/gguf.h +202 -0
  98. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
  99. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
  100. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-whisper.h +52 -0
  101. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper-arch.h +197 -0
  102. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper.h +739 -0
  103. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Info.plist +0 -0
  104. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  105. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
  106. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
  107. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
  108. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +354 -0
  109. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +1861 -0
  110. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
  111. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
  112. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +603 -0
  113. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
  114. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal.h +66 -0
  115. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +237 -0
  116. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +100 -0
  117. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-threading.h +14 -0
  118. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +2221 -0
  119. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/gguf.h +202 -0
  120. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
  121. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
  122. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +52 -0
  123. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper-arch.h +197 -0
  124. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +739 -0
  125. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  126. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +101 -0
  127. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  128. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  129. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
  130. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
  131. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +354 -0
  132. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-common.h +1861 -0
  133. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
  134. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
  135. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +603 -0
  136. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
  137. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal.h +66 -0
  138. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-opt.h +237 -0
  139. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-quants.h +100 -0
  140. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-threading.h +14 -0
  141. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +2221 -0
  142. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/gguf.h +202 -0
  143. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
  144. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
  145. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-whisper.h +52 -0
  146. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper-arch.h +197 -0
  147. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper.h +739 -0
  148. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Info.plist +0 -0
  149. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  150. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
  151. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
  152. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
  153. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +354 -0
  154. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +1861 -0
  155. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
  156. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
  157. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +603 -0
  158. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
  159. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal.h +66 -0
  160. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +237 -0
  161. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +100 -0
  162. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-threading.h +14 -0
  163. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +2221 -0
  164. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/gguf.h +202 -0
  165. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
  166. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
  167. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +52 -0
  168. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper-arch.h +197 -0
  169. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +739 -0
  170. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  171. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +101 -0
  172. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  173. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  174. package/jest/mock.js +5 -0
  175. package/lib/commonjs/version.json +1 -1
  176. package/lib/module/version.json +1 -1
  177. package/package.json +10 -6
  178. package/src/version.json +1 -1
  179. package/whisper-rn.podspec +11 -18
  180. package/cpp/README.md +0 -4
  181. package/cpp/ggml-aarch64.c +0 -3209
  182. package/cpp/ggml-aarch64.h +0 -39
  183. package/cpp/ggml-cpu-impl.h +0 -614
package/cpp/whisper.h CHANGED
@@ -2,6 +2,7 @@
2
2
  #define WHISPER_H
3
3
 
4
4
  #include "ggml.h"
5
+ #include "ggml-cpu.h"
5
6
 
6
7
  #include <stddef.h>
7
8
  #include <stdint.h>
@@ -189,6 +190,15 @@ extern "C" {
189
190
  uint32_t value; // Unicode code point or rule ID
190
191
  } whisper_grammar_element;
191
192
 
193
+ typedef struct whisper_vad_params {
194
+ float threshold; // Probability threshold to consider as speech.
195
+ int min_speech_duration_ms; // Min duration for a valid speech segment.
196
+ int min_silence_duration_ms; // Min silence duration to consider speech as ended.
197
+ float max_speech_duration_s; // Max duration of a speech segment before forcing a new segment.
198
+ int speech_pad_ms; // Padding added before and after speech segments.
199
+ float samples_overlap; // Overlap in seconds when copying audio samples from speech segment.
200
+ } whisper_vad_params;
201
+
192
202
  // Various functions for loading a ggml whisper model.
193
203
  // Allocate (almost) all memory needed for the model.
194
204
  // Return NULL on failure
@@ -425,21 +435,11 @@ extern "C" {
425
435
 
426
436
  // Performance information from the default state.
427
437
  struct whisper_timings {
428
- int64_t load_us;
429
- int64_t t_start_us;
430
- int32_t fail_p;
431
- int32_t fail_h;
432
- int64_t t_mel_us;
433
- int32_t n_sample;
434
- int32_t n_encode;
435
- int32_t n_decode;
436
- int32_t n_batchd;
437
- int32_t n_prompt;
438
- int64_t t_sample_us;
439
- int64_t t_encode_us;
440
- int64_t t_decode_us;
441
- int64_t t_batchd_us;
442
- int64_t t_prompt_us;
438
+ float sample_ms;
439
+ float encode_ms;
440
+ float decode_ms;
441
+ float batchd_ms;
442
+ float prompt_ms;
443
443
  };
444
444
  WHISPER_API struct whisper_timings * whisper_get_timings(struct whisper_context * ctx);
445
445
  WHISPER_API void whisper_print_timings(struct whisper_context * ctx);
@@ -532,8 +532,8 @@ extern "C" {
532
532
  bool detect_language;
533
533
 
534
534
  // common decoding parameters:
535
- bool suppress_blank; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L89
536
- bool suppress_non_speech_tokens; // ref: https://github.com/openai/whisper/blob/7858aa9c08d98f75575035ecd6481f462d66ca27/whisper/tokenizer.py#L224-L253
535
+ bool suppress_blank; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L89
536
+ bool suppress_nst; // non-speech tokens, ref: https://github.com/openai/whisper/blob/7858aa9c08d98f75575035ecd6481f462d66ca27/whisper/tokenizer.py#L224-L253
537
537
 
538
538
  float temperature; // initial decoding temperature, ref: https://ai.stackexchange.com/a/32478
539
539
  float max_initial_ts; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L97
@@ -544,7 +544,7 @@ extern "C" {
544
544
  float temperature_inc;
545
545
  float entropy_thold; // similar to OpenAI's "compression_ratio_threshold"
546
546
  float logprob_thold;
547
- float no_speech_thold; // TODO: not implemented
547
+ float no_speech_thold;
548
548
 
549
549
  struct {
550
550
  int best_of; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L264
@@ -580,11 +580,18 @@ extern "C" {
580
580
  size_t n_grammar_rules;
581
581
  size_t i_start_rule;
582
582
  float grammar_penalty;
583
+
584
+ // Voice Activity Detection (VAD) params
585
+ bool vad; // Enable VAD
586
+ const char * vad_model_path; // Path to VAD model
587
+
588
+ whisper_vad_params vad_params;
583
589
  };
584
590
 
585
591
  // NOTE: this function allocates memory, and it is the responsibility of the caller to free the pointer - see whisper_free_context_params & whisper_free_params()
586
592
  WHISPER_API struct whisper_context_params * whisper_context_default_params_by_ref(void);
587
593
  WHISPER_API struct whisper_context_params whisper_context_default_params (void);
594
+
588
595
  WHISPER_API struct whisper_full_params * whisper_full_default_params_by_ref(enum whisper_sampling_strategy strategy);
589
596
  WHISPER_API struct whisper_full_params whisper_full_default_params (enum whisper_sampling_strategy strategy);
590
597
 
@@ -662,6 +669,53 @@ extern "C" {
662
669
  WHISPER_API float whisper_full_get_token_p (struct whisper_context * ctx, int i_segment, int i_token);
663
670
  WHISPER_API float whisper_full_get_token_p_from_state(struct whisper_state * state, int i_segment, int i_token);
664
671
 
672
+ //
673
+ // Voice Activity Detection (VAD)
674
+ //
675
+
676
+ struct whisper_vad_context;
677
+
678
+ WHISPER_API struct whisper_vad_params whisper_vad_default_params(void);
679
+
680
+ struct whisper_vad_context_params {
681
+ int n_threads; // The number of threads to use for processing.
682
+ bool use_gpu;
683
+ int gpu_device; // CUDA device
684
+ };
685
+
686
+ WHISPER_API struct whisper_vad_context_params whisper_vad_default_context_params(void);
687
+
688
+ WHISPER_API struct whisper_vad_context * whisper_vad_init_from_file_with_params(const char * path_model, struct whisper_vad_context_params params);
689
+ WHISPER_API struct whisper_vad_context * whisper_vad_init_with_params (struct whisper_model_loader * loader, struct whisper_vad_context_params params);
690
+
691
+ WHISPER_API bool whisper_vad_detect_speech(
692
+ struct whisper_vad_context * vctx,
693
+ const float * samples,
694
+ int n_samples);
695
+
696
+ WHISPER_API int whisper_vad_n_probs(struct whisper_vad_context * vctx);
697
+ WHISPER_API float * whisper_vad_probs (struct whisper_vad_context * vctx);
698
+
699
+ struct whisper_vad_segments;
700
+
701
+ WHISPER_API struct whisper_vad_segments * whisper_vad_segments_from_probs(
702
+ struct whisper_vad_context * vctx,
703
+ struct whisper_vad_params params);
704
+
705
+ WHISPER_API struct whisper_vad_segments * whisper_vad_segments_from_samples(
706
+ struct whisper_vad_context * vctx,
707
+ struct whisper_vad_params params,
708
+ const float * samples,
709
+ int n_samples);
710
+
711
+ WHISPER_API int whisper_vad_segments_n_segments(struct whisper_vad_segments * segments);
712
+
713
+ WHISPER_API float whisper_vad_segments_get_segment_t0(struct whisper_vad_segments * segments, int i_segment);
714
+ WHISPER_API float whisper_vad_segments_get_segment_t1(struct whisper_vad_segments * segments, int i_segment);
715
+
716
+ WHISPER_API void whisper_vad_free_segments(struct whisper_vad_segments * segments);
717
+ WHISPER_API void whisper_vad_free (struct whisper_vad_context * ctx);
718
+
665
719
  ////////////////////////////////////////////////////////////////////////////
666
720
 
667
721
  // Temporary helpers needed for exposing ggml interface
@@ -675,6 +729,9 @@ extern "C" {
675
729
 
676
730
  WHISPER_API void whisper_log_set(wsp_ggml_log_callback log_callback, void * user_data);
677
731
 
732
+ // Get the no_speech probability for the specified segment
733
+ WHISPER_API float whisper_full_get_segment_no_speech_prob (struct whisper_context * ctx, int i_segment);
734
+ WHISPER_API float whisper_full_get_segment_no_speech_prob_from_state(struct whisper_state * state, int i_segment);
678
735
  #ifdef __cplusplus
679
736
  }
680
737
  #endif
@@ -0,0 +1,95 @@
1
+ cmake_minimum_required(VERSION 3.16)
2
+ project(rnwhisper VERSION 1.0.0 LANGUAGES CXX C)
3
+
4
+ set(CMAKE_CXX_STANDARD 17)
5
+ set(CMAKE_CXX_STANDARD_REQUIRED ON)
6
+
7
+ # iOS specific settings
8
+ set(CMAKE_OSX_DEPLOYMENT_TARGET 13.0)
9
+ set(CMAKE_XCODE_ATTRIBUTE_ENABLE_BITCODE NO)
10
+
11
+ # Dependencies and compile options
12
+ add_definitions(
13
+ -DNDEBUG
14
+ -DO3
15
+ -DWSP_GGML_USE_CPU
16
+ -DWSP_GGML_USE_ACCELERATE
17
+ -DWSP_GGML_USE_METAL
18
+ -DWSP_GGML_METAL_USE_BF16
19
+ )
20
+
21
+ if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64;x86_64")
22
+ add_definitions(-DWSP_GGML_CPU_GENERIC)
23
+ endif ()
24
+
25
+ set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../cpp)
26
+
27
+ if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64")
28
+ set(SOURCE_FILES_ARCH
29
+ ${SOURCE_DIR}/ggml-cpu/arch/arm/quants.c
30
+ ${SOURCE_DIR}/ggml-cpu/arch/arm/repack.cpp
31
+ )
32
+ endif ()
33
+
34
+ # Define public headers
35
+ set(PUBLIC_HEADERS
36
+ ${SOURCE_DIR}/rn-whisper.h
37
+ ${SOURCE_DIR}/whisper.h
38
+ ${SOURCE_DIR}/ggml.h
39
+ )
40
+
41
+ # Create library target
42
+ add_library(rnwhisper SHARED
43
+ ${SOURCE_DIR}/ggml.c
44
+ ${SOURCE_DIR}/ggml-alloc.c
45
+ ${SOURCE_DIR}/ggml-backend.cpp
46
+ ${SOURCE_DIR}/ggml-backend-reg.cpp
47
+ ${SOURCE_DIR}/ggml-cpu/amx/amx.cpp
48
+ ${SOURCE_DIR}/ggml-cpu/amx/mmq.cpp
49
+ ${SOURCE_DIR}/ggml-cpu/ggml-cpu.c
50
+ ${SOURCE_DIR}/ggml-cpu/ggml-cpu.cpp
51
+ ${SOURCE_DIR}/ggml-cpu/quants.c
52
+ ${SOURCE_DIR}/ggml-cpu/traits.cpp
53
+ ${SOURCE_DIR}/ggml-cpu/repack.cpp
54
+ ${SOURCE_DIR}/ggml-cpu/unary-ops.cpp
55
+ ${SOURCE_DIR}/ggml-cpu/binary-ops.cpp
56
+ ${SOURCE_DIR}/ggml-cpu/vec.cpp
57
+ ${SOURCE_DIR}/ggml-cpu/ops.cpp
58
+ ${SOURCE_DIR}/ggml-metal.m
59
+ ${SOURCE_DIR}/ggml-opt.cpp
60
+ ${SOURCE_DIR}/ggml-threading.cpp
61
+ ${SOURCE_DIR}/ggml-quants.c
62
+ ${SOURCE_DIR}/gguf.cpp
63
+ ${SOURCE_DIR}/whisper.cpp
64
+ ${SOURCE_DIR}/rn-whisper.cpp
65
+ ${SOURCE_DIR}/rn-audioutils.cpp
66
+ ${SOURCE_FILES_ARCH}
67
+ )
68
+
69
+ # Setup include directories
70
+ target_include_directories(rnwhisper
71
+ PUBLIC
72
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../cpp>
73
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../cpp/ggml-cpu>
74
+ $<INSTALL_INTERFACE:include>
75
+ )
76
+
77
+ # Link required frameworks
78
+ target_link_libraries(rnwhisper PRIVATE
79
+ "-framework Accelerate"
80
+ "-framework Foundation"
81
+ "-framework Metal"
82
+ "-framework MetalKit"
83
+ )
84
+
85
+ # Set properties for framework
86
+ set_target_properties(rnwhisper PROPERTIES
87
+ MACOSX_FRAMEWORK_IDENTIFIER "com.rnwhisper"
88
+ MACOSX_FRAMEWORK_BUNDLE_VERSION 1.0.0
89
+ MACOSX_FRAMEWORK_SHORT_VERSION_STRING 1.0.0
90
+ FRAMEWORK TRUE
91
+ FRAMEWORK_VERSION 1.0.0
92
+ VERSION 1.0.0
93
+ PUBLIC_HEADER "${PUBLIC_HEADERS}"
94
+ XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC NO
95
+ )
package/ios/RNWhisper.h CHANGED
@@ -1,6 +1,11 @@
1
1
  #ifdef __cplusplus
2
+ #if RNWHISPER_BUILD_FROM_SOURCE
2
3
  #import "whisper.h"
3
4
  #import "rn-whisper.h"
5
+ #else
6
+ #import <rnwhisper/whisper.h>
7
+ #import <rnwhisper/rn-whisper.h>
8
+ #endif
4
9
  #endif
5
10
 
6
11
  #import <React/RCTBridgeModule.h>
@@ -1,5 +1,9 @@
1
1
  #import "RNWhisperAudioUtils.h"
2
+ #if RNWHISPER_BUILD_FROM_SOURCE
2
3
  #import "whisper.h"
4
+ #else
5
+ #import <rnwhisper/whisper.h>
6
+ #endif
3
7
 
4
8
  @implementation RNWhisperAudioUtils
5
9
 
@@ -1,6 +1,11 @@
1
1
  #ifdef __cplusplus
2
+ #if RNWHISPER_BUILD_FROM_SOURCE
2
3
  #import "whisper.h"
3
4
  #import "rn-whisper.h"
5
+ #else
6
+ #import <rnwhisper/whisper.h>
7
+ #import <rnwhisper/rn-whisper.h>
8
+ #endif
4
9
  #endif
5
10
 
6
11
  #import <AVFoundation/AVFoundation.h>
@@ -19,8 +19,9 @@
19
19
  cparams.use_gpu = !noMetal;
20
20
  cparams.flash_attn = useFlashAttn;
21
21
 
22
- // TODO: Figure out why it leads to re-init crash
22
+ // TODO: Expose dtw_token_timestamps and dtw_aheads_preset
23
23
  cparams.dtw_token_timestamps = false;
24
+ // cparams.dtw_aheads_preset = WHISPER_AHEADS_BASE;
24
25
 
25
26
  cparams.use_coreml = !noCoreML;
26
27
  #ifndef WHISPER_USE_COREML
@@ -431,6 +432,7 @@ struct rnwhisper_segments_callback_data {
431
432
  self->recordState.job = job;
432
433
  int code = [self fullTranscribe:job audioData:audioData audioDataCount:audioDataCount];
433
434
  rnwhisper::job_remove(jobId);
435
+ self->recordState.job = nullptr;
434
436
  self->recordState.isTranscribing = false;
435
437
  onEnd(code);
436
438
  });
@@ -445,7 +447,7 @@ struct rnwhisper_segments_callback_data {
445
447
  }
446
448
 
447
449
  - (void)stopTranscribe:(int)jobId {
448
- if (self->recordState.job) self->recordState.job->abort();
450
+ if (self->recordState.job != nullptr) self->recordState.job->abort();
449
451
  if (self->recordState.isRealtime && self->recordState.isCapturing) {
450
452
  [self stopAudio];
451
453
  if (!self->recordState.isTranscribing) {
@@ -0,0 +1,74 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3
+ <plist version="1.0">
4
+ <dict>
5
+ <key>AvailableLibraries</key>
6
+ <array>
7
+ <dict>
8
+ <key>LibraryIdentifier</key>
9
+ <string>ios-arm64</string>
10
+ <key>LibraryPath</key>
11
+ <string>rnwhisper.framework</string>
12
+ <key>SupportedArchitectures</key>
13
+ <array>
14
+ <string>arm64</string>
15
+ </array>
16
+ <key>SupportedPlatform</key>
17
+ <string>ios</string>
18
+ </dict>
19
+ <dict>
20
+ <key>LibraryIdentifier</key>
21
+ <string>ios-arm64_x86_64-simulator</string>
22
+ <key>LibraryPath</key>
23
+ <string>rnwhisper.framework</string>
24
+ <key>SupportedArchitectures</key>
25
+ <array>
26
+ <string>arm64</string>
27
+ <string>x86_64</string>
28
+ </array>
29
+ <key>SupportedPlatform</key>
30
+ <string>ios</string>
31
+ <key>SupportedPlatformVariant</key>
32
+ <string>simulator</string>
33
+ </dict>
34
+ <dict>
35
+ <key>LibraryIdentifier</key>
36
+ <string>tvos-arm64</string>
37
+ <key>LibraryPath</key>
38
+ <string>rnwhisper.framework</string>
39
+ <key>SupportedArchitectures</key>
40
+ <array>
41
+ <string>arm64</string>
42
+ </array>
43
+ <key>SupportedPlatform</key>
44
+ <string>tvos</string>
45
+ </dict>
46
+ <dict>
47
+ <key>LibraryIdentifier</key>
48
+ <string>tvos-arm64_x86_64-simulator</string>
49
+ <key>LibraryPath</key>
50
+ <string>rnwhisper.framework</string>
51
+ <key>SupportedArchitectures</key>
52
+ <array>
53
+ <string>arm64</string>
54
+ <string>x86_64</string>
55
+ </array>
56
+ <key>SupportedPlatform</key>
57
+ <string>tvos</string>
58
+ <key>SupportedPlatformVariant</key>
59
+ <string>simulator</string>
60
+ </dict>
61
+
62
+ </array>
63
+ <key>CFBundlePackageType</key>
64
+ <string>XFWK</string>
65
+ <key>XCFrameworkFormatVersion</key>
66
+ <string>1.0</string>
67
+ <key>CFBundleVersion</key>
68
+ <string>1.0.0</string>
69
+ <key>CFBundleShortVersionString</key>
70
+ <string>1.0.0</string>
71
+ <key>CFBundleIdentifier</key>
72
+ <string>com.rnwhisper</string>
73
+ </dict>
74
+ </plist>
@@ -0,0 +1,76 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h"
4
+
5
+ #ifdef __cplusplus
6
+ extern "C" {
7
+ #endif
8
+
9
+ typedef struct wsp_ggml_backend_buffer_type * wsp_ggml_backend_buffer_type_t;
10
+ typedef struct wsp_ggml_backend_buffer * wsp_ggml_backend_buffer_t;
11
+ typedef struct wsp_ggml_backend * wsp_ggml_backend_t;
12
+
13
+ // Tensor allocator
14
+ struct wsp_ggml_tallocr {
15
+ wsp_ggml_backend_buffer_t buffer;
16
+ void * base;
17
+ size_t alignment;
18
+ size_t offset;
19
+ };
20
+
21
+ WSP_GGML_API struct wsp_ggml_tallocr wsp_ggml_tallocr_new(wsp_ggml_backend_buffer_t buffer);
22
+ WSP_GGML_API enum wsp_ggml_status wsp_ggml_tallocr_alloc(struct wsp_ggml_tallocr * talloc, struct wsp_ggml_tensor * tensor);
23
+
24
+ // Graph allocator
25
+ /*
26
+ Example usage:
27
+ wsp_ggml_gallocr_t galloc = wsp_ggml_gallocr_new(wsp_ggml_backend_cpu_buffer_type());
28
+
29
+ // optional: create a worst-case graph and reserve the buffers to avoid reallocations
30
+ wsp_ggml_gallocr_reserve(galloc, build_graph(max_batch));
31
+
32
+ // allocate the graph
33
+ struct wsp_ggml_cgraph * graph = build_graph(batch);
34
+ wsp_ggml_gallocr_alloc_graph(galloc, graph);
35
+
36
+ printf("compute buffer size: %zu bytes\n", wsp_ggml_gallocr_get_buffer_size(galloc, 0));
37
+
38
+ // evaluate the graph
39
+ wsp_ggml_backend_graph_compute(backend, graph);
40
+ */
41
+
42
+ // special tensor flags for use with the graph allocator:
43
+ // wsp_ggml_set_input(): all input tensors are allocated at the beginning of the graph in non-overlapping addresses
44
+ // wsp_ggml_set_output(): output tensors are never freed and never overwritten
45
+
46
+ typedef struct wsp_ggml_gallocr * wsp_ggml_gallocr_t;
47
+
48
+ WSP_GGML_API wsp_ggml_gallocr_t wsp_ggml_gallocr_new(wsp_ggml_backend_buffer_type_t buft);
49
+ WSP_GGML_API wsp_ggml_gallocr_t wsp_ggml_gallocr_new_n(wsp_ggml_backend_buffer_type_t * bufts, int n_bufs);
50
+ WSP_GGML_API void wsp_ggml_gallocr_free(wsp_ggml_gallocr_t galloc);
51
+
52
+ // pre-allocate buffers from a measure graph - does not allocate or modify the graph
53
+ // call with a worst-case graph to avoid buffer reallocations
54
+ // not strictly required for single buffer usage: wsp_ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed
55
+ // returns false if the buffer allocation failed
56
+ WSP_GGML_API bool wsp_ggml_gallocr_reserve(wsp_ggml_gallocr_t galloc, struct wsp_ggml_cgraph * graph);
57
+ WSP_GGML_API bool wsp_ggml_gallocr_reserve_n(
58
+ wsp_ggml_gallocr_t galloc,
59
+ struct wsp_ggml_cgraph * graph,
60
+ const int * node_buffer_ids,
61
+ const int * leaf_buffer_ids);
62
+
63
+ // automatic reallocation if the topology changes when using a single buffer
64
+ // returns false if using multiple buffers and a re-allocation is needed (call wsp_ggml_gallocr_reserve_n first to set the node buffers)
65
+ WSP_GGML_API bool wsp_ggml_gallocr_alloc_graph(wsp_ggml_gallocr_t galloc, struct wsp_ggml_cgraph * graph);
66
+
67
+ WSP_GGML_API size_t wsp_ggml_gallocr_get_buffer_size(wsp_ggml_gallocr_t galloc, int buffer_id);
68
+
69
+ // Utils
70
+ // Create a buffer and allocate all the tensors in a wsp_ggml_context
71
+ WSP_GGML_API struct wsp_ggml_backend_buffer * wsp_ggml_backend_alloc_ctx_tensors_from_buft(struct wsp_ggml_context * ctx, wsp_ggml_backend_buffer_type_t buft);
72
+ WSP_GGML_API struct wsp_ggml_backend_buffer * wsp_ggml_backend_alloc_ctx_tensors(struct wsp_ggml_context * ctx, wsp_ggml_backend_t backend);
73
+
74
+ #ifdef __cplusplus
75
+ }
76
+ #endif