whisper.rn 0.4.0-rc.8 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. package/README.md +5 -1
  2. package/android/build.gradle +12 -3
  3. package/android/src/main/CMakeLists.txt +44 -13
  4. package/android/src/main/java/com/rnwhisper/AudioUtils.java +27 -12
  5. package/android/src/main/java/com/rnwhisper/RNWhisper.java +75 -34
  6. package/android/src/main/java/com/rnwhisper/WhisperContext.java +53 -38
  7. package/android/src/main/jni.cpp +38 -1
  8. package/android/src/main/jniLibs/arm64-v8a/librnwhisper.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnwhisper_v8fp16_va_2.so +0 -0
  10. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper.so +0 -0
  11. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper_vfpv4.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/librnwhisper.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnwhisper_x86_64.so +0 -0
  14. package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +10 -0
  15. package/android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java +10 -0
  16. package/cpp/coreml/whisper-compat.h +10 -0
  17. package/cpp/coreml/whisper-compat.m +35 -0
  18. package/cpp/coreml/whisper-decoder-impl.h +27 -15
  19. package/cpp/coreml/whisper-decoder-impl.m +36 -10
  20. package/cpp/coreml/whisper-encoder-impl.h +21 -9
  21. package/cpp/coreml/whisper-encoder-impl.m +29 -3
  22. package/cpp/ggml-alloc.c +727 -517
  23. package/cpp/ggml-alloc.h +47 -65
  24. package/cpp/ggml-backend-impl.h +196 -57
  25. package/cpp/ggml-backend-reg.cpp +591 -0
  26. package/cpp/ggml-backend.cpp +2016 -0
  27. package/cpp/ggml-backend.h +234 -89
  28. package/cpp/ggml-common.h +1861 -0
  29. package/cpp/ggml-cpp.h +39 -0
  30. package/cpp/ggml-cpu/amx/amx.cpp +221 -0
  31. package/cpp/ggml-cpu/amx/amx.h +8 -0
  32. package/cpp/ggml-cpu/amx/common.h +91 -0
  33. package/cpp/ggml-cpu/amx/mmq.cpp +2511 -0
  34. package/cpp/ggml-cpu/amx/mmq.h +10 -0
  35. package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  36. package/cpp/ggml-cpu/arch/arm/quants.c +4113 -0
  37. package/cpp/ggml-cpu/arch/arm/repack.cpp +2162 -0
  38. package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  39. package/cpp/ggml-cpu/arch/x86/quants.c +4310 -0
  40. package/cpp/ggml-cpu/arch/x86/repack.cpp +3284 -0
  41. package/cpp/ggml-cpu/arch-fallback.h +184 -0
  42. package/cpp/ggml-cpu/binary-ops.cpp +158 -0
  43. package/cpp/ggml-cpu/binary-ops.h +16 -0
  44. package/cpp/ggml-cpu/common.h +72 -0
  45. package/cpp/ggml-cpu/ggml-cpu-impl.h +511 -0
  46. package/cpp/ggml-cpu/ggml-cpu.c +3473 -0
  47. package/cpp/ggml-cpu/ggml-cpu.cpp +671 -0
  48. package/cpp/ggml-cpu/ops.cpp +9085 -0
  49. package/cpp/ggml-cpu/ops.h +111 -0
  50. package/cpp/ggml-cpu/quants.c +1157 -0
  51. package/cpp/ggml-cpu/quants.h +89 -0
  52. package/cpp/ggml-cpu/repack.cpp +1570 -0
  53. package/cpp/ggml-cpu/repack.h +98 -0
  54. package/cpp/ggml-cpu/simd-mappings.h +1006 -0
  55. package/cpp/ggml-cpu/traits.cpp +36 -0
  56. package/cpp/ggml-cpu/traits.h +38 -0
  57. package/cpp/ggml-cpu/unary-ops.cpp +186 -0
  58. package/cpp/ggml-cpu/unary-ops.h +28 -0
  59. package/cpp/ggml-cpu/vec.cpp +321 -0
  60. package/cpp/ggml-cpu/vec.h +973 -0
  61. package/cpp/ggml-cpu.h +143 -0
  62. package/cpp/ggml-impl.h +525 -168
  63. package/cpp/ggml-metal-impl.h +622 -0
  64. package/cpp/ggml-metal.h +16 -14
  65. package/cpp/ggml-metal.m +5289 -1859
  66. package/cpp/ggml-opt.cpp +1037 -0
  67. package/cpp/ggml-opt.h +237 -0
  68. package/cpp/ggml-quants.c +2916 -6877
  69. package/cpp/ggml-quants.h +87 -249
  70. package/cpp/ggml-threading.cpp +12 -0
  71. package/cpp/ggml-threading.h +14 -0
  72. package/cpp/ggml-whisper-sim.metallib +0 -0
  73. package/cpp/ggml-whisper.metallib +0 -0
  74. package/cpp/ggml.c +3293 -16770
  75. package/cpp/ggml.h +778 -835
  76. package/cpp/gguf.cpp +1347 -0
  77. package/cpp/gguf.h +202 -0
  78. package/cpp/rn-whisper.cpp +84 -0
  79. package/cpp/rn-whisper.h +2 -0
  80. package/cpp/whisper-arch.h +197 -0
  81. package/cpp/whisper.cpp +3240 -944
  82. package/cpp/whisper.h +144 -31
  83. package/ios/CMakeLists.txt +95 -0
  84. package/ios/RNWhisper.h +5 -0
  85. package/ios/RNWhisper.mm +124 -37
  86. package/ios/RNWhisperAudioUtils.h +1 -0
  87. package/ios/RNWhisperAudioUtils.m +24 -13
  88. package/ios/RNWhisperContext.h +8 -2
  89. package/ios/RNWhisperContext.mm +42 -8
  90. package/ios/rnwhisper.xcframework/Info.plist +74 -0
  91. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
  92. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
  93. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +354 -0
  94. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-common.h +1861 -0
  95. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
  96. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
  97. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +603 -0
  98. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
  99. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal.h +66 -0
  100. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-opt.h +237 -0
  101. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-quants.h +100 -0
  102. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-threading.h +14 -0
  103. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +2221 -0
  104. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/gguf.h +202 -0
  105. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
  106. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
  107. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-whisper.h +52 -0
  108. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper-arch.h +197 -0
  109. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper.h +739 -0
  110. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Info.plist +0 -0
  111. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  112. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
  113. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
  114. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
  115. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +354 -0
  116. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +1861 -0
  117. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
  118. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
  119. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +603 -0
  120. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
  121. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal.h +66 -0
  122. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +237 -0
  123. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +100 -0
  124. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-threading.h +14 -0
  125. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +2221 -0
  126. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/gguf.h +202 -0
  127. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
  128. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
  129. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +52 -0
  130. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper-arch.h +197 -0
  131. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +739 -0
  132. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  133. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +101 -0
  134. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  135. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  136. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
  137. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
  138. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +354 -0
  139. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-common.h +1861 -0
  140. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
  141. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
  142. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +603 -0
  143. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
  144. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal.h +66 -0
  145. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-opt.h +237 -0
  146. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-quants.h +100 -0
  147. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-threading.h +14 -0
  148. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +2221 -0
  149. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/gguf.h +202 -0
  150. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
  151. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
  152. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-whisper.h +52 -0
  153. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper-arch.h +197 -0
  154. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper.h +739 -0
  155. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Info.plist +0 -0
  156. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  157. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
  158. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
  159. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
  160. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +354 -0
  161. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +1861 -0
  162. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
  163. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
  164. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +603 -0
  165. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
  166. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal.h +66 -0
  167. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +237 -0
  168. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +100 -0
  169. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-threading.h +14 -0
  170. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +2221 -0
  171. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/gguf.h +202 -0
  172. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
  173. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
  174. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +52 -0
  175. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper-arch.h +197 -0
  176. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +739 -0
  177. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  178. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +101 -0
  179. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  180. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  181. package/jest/mock.js +14 -1
  182. package/lib/commonjs/NativeRNWhisper.js.map +1 -1
  183. package/lib/commonjs/index.js +48 -19
  184. package/lib/commonjs/index.js.map +1 -1
  185. package/lib/commonjs/version.json +1 -1
  186. package/lib/module/NativeRNWhisper.js.map +1 -1
  187. package/lib/module/index.js +48 -19
  188. package/lib/module/index.js.map +1 -1
  189. package/lib/module/version.json +1 -1
  190. package/lib/typescript/NativeRNWhisper.d.ts +6 -3
  191. package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
  192. package/lib/typescript/index.d.ts +25 -3
  193. package/lib/typescript/index.d.ts.map +1 -1
  194. package/package.json +15 -10
  195. package/src/NativeRNWhisper.ts +12 -3
  196. package/src/index.ts +63 -24
  197. package/src/version.json +1 -1
  198. package/whisper-rn.podspec +18 -18
  199. package/cpp/README.md +0 -4
  200. package/cpp/ggml-backend.c +0 -1718
  201. package/cpp/ggml-metal-whisper.metal +0 -5820
package/cpp/whisper.h CHANGED
@@ -2,6 +2,7 @@
2
2
  #define WHISPER_H
3
3
 
4
4
  #include "ggml.h"
5
+ #include "ggml-cpu.h"
5
6
 
6
7
  #include <stddef.h>
7
8
  #include <stdint.h>
@@ -84,9 +85,48 @@ extern "C" {
84
85
  typedef int32_t whisper_token;
85
86
  typedef int32_t whisper_seq_id;
86
87
 
88
+ enum whisper_alignment_heads_preset {
89
+ WHISPER_AHEADS_NONE,
90
+ WHISPER_AHEADS_N_TOP_MOST, // All heads from the N-top-most text-layers
91
+ WHISPER_AHEADS_CUSTOM,
92
+ WHISPER_AHEADS_TINY_EN,
93
+ WHISPER_AHEADS_TINY,
94
+ WHISPER_AHEADS_BASE_EN,
95
+ WHISPER_AHEADS_BASE,
96
+ WHISPER_AHEADS_SMALL_EN,
97
+ WHISPER_AHEADS_SMALL,
98
+ WHISPER_AHEADS_MEDIUM_EN,
99
+ WHISPER_AHEADS_MEDIUM,
100
+ WHISPER_AHEADS_LARGE_V1,
101
+ WHISPER_AHEADS_LARGE_V2,
102
+ WHISPER_AHEADS_LARGE_V3,
103
+ WHISPER_AHEADS_LARGE_V3_TURBO,
104
+ };
105
+
106
+ typedef struct whisper_ahead {
107
+ int n_text_layer;
108
+ int n_head;
109
+ } whisper_ahead;
110
+
111
+ typedef struct whisper_aheads {
112
+ size_t n_heads;
113
+ const whisper_ahead * heads;
114
+ } whisper_aheads;
115
+
87
116
  struct whisper_context_params {
88
117
  bool use_gpu;
89
118
  bool use_coreml;
119
+ bool flash_attn;
120
+ int gpu_device; // CUDA device
121
+
122
+ // [EXPERIMENTAL] Token-level timestamps with DTW
123
+ bool dtw_token_timestamps;
124
+ enum whisper_alignment_heads_preset dtw_aheads_preset;
125
+
126
+ int dtw_n_top;
127
+ struct whisper_aheads dtw_aheads;
128
+
129
+ size_t dtw_mem_size; // TODO: remove
90
130
  };
91
131
 
92
132
  typedef struct whisper_token_data {
@@ -103,6 +143,11 @@ extern "C" {
103
143
  int64_t t0; // start time of the token
104
144
  int64_t t1; // end time of the token
105
145
 
146
+ // [EXPERIMENTAL] Token-level timestamps with DTW
147
+ // do not use if you haven't computed token-level timestamps with dtw
148
+ // Roughly corresponds to the moment in audio in which the token was output
149
+ int64_t t_dtw;
150
+
106
151
  float vlen; // voice length of the token
107
152
  } whisper_token_data;
108
153
 
@@ -145,6 +190,15 @@ extern "C" {
145
190
  uint32_t value; // Unicode code point or rule ID
146
191
  } whisper_grammar_element;
147
192
 
193
+ typedef struct whisper_vad_params {
194
+ float threshold; // Probability threshold to consider as speech.
195
+ int min_speech_duration_ms; // Min duration for a valid speech segment.
196
+ int min_silence_duration_ms; // Min silence duration to consider speech as ended.
197
+ float max_speech_duration_s; // Max duration of a speech segment before forcing a new segment.
198
+ int speech_pad_ms; // Padding added before and after speech segments.
199
+ float samples_overlap; // Overlap in seconds when copying audio samples from speech segment.
200
+ } whisper_vad_params;
201
+
148
202
  // Various functions for loading a ggml whisper model.
149
203
  // Allocate (almost) all memory needed for the model.
150
204
  // Return NULL on failure
@@ -196,6 +250,13 @@ extern "C" {
196
250
  // GPU, by caching compiled 'blobs' there.
197
251
  // Set to nullptr if not used.
198
252
  // Returns 0 on success. If OpenVINO is not enabled in build, this simply returns 1.
253
+ WHISPER_API int whisper_ctx_init_openvino_encoder_with_state(
254
+ struct whisper_context * ctx,
255
+ struct whisper_state * state,
256
+ const char * model_path,
257
+ const char * device,
258
+ const char * cache_dir);
259
+
199
260
  WHISPER_API int whisper_ctx_init_openvino_encoder(
200
261
  struct whisper_context * ctx,
201
262
  const char * model_path,
@@ -224,22 +285,6 @@ extern "C" {
224
285
  int n_samples,
225
286
  int n_threads);
226
287
 
227
- // Convert RAW PCM audio to log mel spectrogram but applies a Phase Vocoder to speed up the audio x2.
228
- // The resulting spectrogram is stored inside the default state of the provided whisper context.
229
- // Returns 0 on success
230
- WHISPER_API int whisper_pcm_to_mel_phase_vocoder(
231
- struct whisper_context * ctx,
232
- const float * samples,
233
- int n_samples,
234
- int n_threads);
235
-
236
- WHISPER_API int whisper_pcm_to_mel_phase_vocoder_with_state(
237
- struct whisper_context * ctx,
238
- struct whisper_state * state,
239
- const float * samples,
240
- int n_samples,
241
- int n_threads);
242
-
243
288
  // This can be used to set a custom log mel spectrogram inside the default state of the provided whisper context.
244
289
  // Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
245
290
  // n_mel must be 80
@@ -296,7 +341,7 @@ extern "C" {
296
341
  // Convert the provided text into tokens.
297
342
  // The tokens pointer must be large enough to hold the resulting tokens.
298
343
  // Returns the number of tokens on success, no more than n_max_tokens
299
- // Returns -1 on failure
344
+ // Returns a negative number on failure - the number of tokens that would have been returned
300
345
  // TODO: not sure if correct
301
346
  WHISPER_API int whisper_tokenize(
302
347
  struct whisper_context * ctx,
@@ -304,8 +349,12 @@ extern "C" {
304
349
  whisper_token * tokens,
305
350
  int n_max_tokens);
306
351
 
352
+ // Return the number of tokens in the provided text
353
+ // Equivalent to: -whisper_tokenize(ctx, text, NULL, 0)
354
+ int whisper_token_count(struct whisper_context * ctx, const char * text);
355
+
307
356
  // Largest language id (i.e. number of available languages - 1)
308
- WHISPER_API int whisper_lang_max_id();
357
+ WHISPER_API int whisper_lang_max_id(void);
309
358
 
310
359
  // Return the id of the specified language, returns -1 if not found
311
360
  // Examples:
@@ -385,6 +434,14 @@ extern "C" {
385
434
  WHISPER_API whisper_token whisper_token_transcribe(struct whisper_context * ctx);
386
435
 
387
436
  // Performance information from the default state.
437
+ struct whisper_timings {
438
+ float sample_ms;
439
+ float encode_ms;
440
+ float decode_ms;
441
+ float batchd_ms;
442
+ float prompt_ms;
443
+ };
444
+ WHISPER_API struct whisper_timings * whisper_get_timings(struct whisper_context * ctx);
388
445
  WHISPER_API void whisper_print_timings(struct whisper_context * ctx);
389
446
  WHISPER_API void whisper_reset_timings(struct whisper_context * ctx);
390
447
 
@@ -412,11 +469,6 @@ extern "C" {
412
469
  // If it returns false, the computation is aborted
413
470
  typedef bool (*whisper_encoder_begin_callback)(struct whisper_context * ctx, struct whisper_state * state, void * user_data);
414
471
 
415
- // Abort callback
416
- // If not NULL, called before ggml computation
417
- // If it returns true, the computation is aborted
418
- typedef bool (*whisper_abort_callback)(void * user_data);
419
-
420
472
  // Logits filter callback
421
473
  // Can be used to modify the logits before sampling
422
474
  // If not NULL, called after applying temperature to logits
@@ -458,15 +510,19 @@ extern "C" {
458
510
 
459
511
  // [EXPERIMENTAL] speed-up techniques
460
512
  // note: these can significantly reduce the quality of the output
461
- bool speed_up; // speed-up the audio by 2x using Phase Vocoder
462
513
  bool debug_mode; // enable debug_mode provides extra info (eg. Dump log_mel)
463
514
  int audio_ctx; // overwrite the audio context size (0 = use default)
464
515
 
465
516
  // [EXPERIMENTAL] [TDRZ] tinydiarize
466
517
  bool tdrz_enable; // enable tinydiarize speaker turn detection
467
518
 
519
+ // A regular expression that matches tokens to suppress
520
+ const char * suppress_regex;
521
+
468
522
  // tokens to provide to the whisper decoder as initial prompt
469
523
  // these are prepended to any existing text context from a previous call
524
+ // use whisper_tokenize() to convert text to tokens
525
+ // maximum of whisper_n_text_ctx()/2 tokens are used (typically 224)
470
526
  const char * initial_prompt;
471
527
  const whisper_token * prompt_tokens;
472
528
  int prompt_n_tokens;
@@ -476,8 +532,8 @@ extern "C" {
476
532
  bool detect_language;
477
533
 
478
534
  // common decoding parameters:
479
- bool suppress_blank; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L89
480
- bool suppress_non_speech_tokens; // ref: https://github.com/openai/whisper/blob/7858aa9c08d98f75575035ecd6481f462d66ca27/whisper/tokenizer.py#L224-L253
535
+ bool suppress_blank; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L89
536
+ bool suppress_nst; // non-speech tokens, ref: https://github.com/openai/whisper/blob/7858aa9c08d98f75575035ecd6481f462d66ca27/whisper/tokenizer.py#L224-L253
481
537
 
482
538
  float temperature; // initial decoding temperature, ref: https://ai.stackexchange.com/a/32478
483
539
  float max_initial_ts; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L97
@@ -488,7 +544,7 @@ extern "C" {
488
544
  float temperature_inc;
489
545
  float entropy_thold; // similar to OpenAI's "compression_ratio_threshold"
490
546
  float logprob_thold;
491
- float no_speech_thold; // TODO: not implemented
547
+ float no_speech_thold;
492
548
 
493
549
  struct {
494
550
  int best_of; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L264
@@ -513,7 +569,7 @@ extern "C" {
513
569
  void * encoder_begin_callback_user_data;
514
570
 
515
571
  // called each time before ggml computation starts
516
- whisper_abort_callback abort_callback;
572
+ wsp_ggml_abort_callback abort_callback;
517
573
  void * abort_callback_user_data;
518
574
 
519
575
  // called by each decoder to filter obtained logits
@@ -524,13 +580,20 @@ extern "C" {
524
580
  size_t n_grammar_rules;
525
581
  size_t i_start_rule;
526
582
  float grammar_penalty;
583
+
584
+ // Voice Activity Detection (VAD) params
585
+ bool vad; // Enable VAD
586
+ const char * vad_model_path; // Path to VAD model
587
+
588
+ whisper_vad_params vad_params;
527
589
  };
528
590
 
529
591
  // NOTE: this function allocates memory, and it is the responsibility of the caller to free the pointer - see whisper_free_context_params & whisper_free_params()
530
- WHISPER_API struct whisper_context_params * whisper_context_default_params_by_ref();
531
- WHISPER_API struct whisper_context_params whisper_context_default_params(void);
592
+ WHISPER_API struct whisper_context_params * whisper_context_default_params_by_ref(void);
593
+ WHISPER_API struct whisper_context_params whisper_context_default_params (void);
594
+
532
595
  WHISPER_API struct whisper_full_params * whisper_full_default_params_by_ref(enum whisper_sampling_strategy strategy);
533
- WHISPER_API struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy);
596
+ WHISPER_API struct whisper_full_params whisper_full_default_params (enum whisper_sampling_strategy strategy);
534
597
 
535
598
  // Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
536
599
  // Not thread safe for same context
@@ -606,6 +669,53 @@ extern "C" {
606
669
  WHISPER_API float whisper_full_get_token_p (struct whisper_context * ctx, int i_segment, int i_token);
607
670
  WHISPER_API float whisper_full_get_token_p_from_state(struct whisper_state * state, int i_segment, int i_token);
608
671
 
672
+ //
673
+ // Voice Activity Detection (VAD)
674
+ //
675
+
676
+ struct whisper_vad_context;
677
+
678
+ WHISPER_API struct whisper_vad_params whisper_vad_default_params(void);
679
+
680
+ struct whisper_vad_context_params {
681
+ int n_threads; // The number of threads to use for processing.
682
+ bool use_gpu;
683
+ int gpu_device; // CUDA device
684
+ };
685
+
686
+ WHISPER_API struct whisper_vad_context_params whisper_vad_default_context_params(void);
687
+
688
+ WHISPER_API struct whisper_vad_context * whisper_vad_init_from_file_with_params(const char * path_model, struct whisper_vad_context_params params);
689
+ WHISPER_API struct whisper_vad_context * whisper_vad_init_with_params (struct whisper_model_loader * loader, struct whisper_vad_context_params params);
690
+
691
+ WHISPER_API bool whisper_vad_detect_speech(
692
+ struct whisper_vad_context * vctx,
693
+ const float * samples,
694
+ int n_samples);
695
+
696
+ WHISPER_API int whisper_vad_n_probs(struct whisper_vad_context * vctx);
697
+ WHISPER_API float * whisper_vad_probs (struct whisper_vad_context * vctx);
698
+
699
+ struct whisper_vad_segments;
700
+
701
+ WHISPER_API struct whisper_vad_segments * whisper_vad_segments_from_probs(
702
+ struct whisper_vad_context * vctx,
703
+ struct whisper_vad_params params);
704
+
705
+ WHISPER_API struct whisper_vad_segments * whisper_vad_segments_from_samples(
706
+ struct whisper_vad_context * vctx,
707
+ struct whisper_vad_params params,
708
+ const float * samples,
709
+ int n_samples);
710
+
711
+ WHISPER_API int whisper_vad_segments_n_segments(struct whisper_vad_segments * segments);
712
+
713
+ WHISPER_API float whisper_vad_segments_get_segment_t0(struct whisper_vad_segments * segments, int i_segment);
714
+ WHISPER_API float whisper_vad_segments_get_segment_t1(struct whisper_vad_segments * segments, int i_segment);
715
+
716
+ WHISPER_API void whisper_vad_free_segments(struct whisper_vad_segments * segments);
717
+ WHISPER_API void whisper_vad_free (struct whisper_vad_context * ctx);
718
+
609
719
  ////////////////////////////////////////////////////////////////////////////
610
720
 
611
721
  // Temporary helpers needed for exposing ggml interface
@@ -619,6 +729,9 @@ extern "C" {
619
729
 
620
730
  WHISPER_API void whisper_log_set(wsp_ggml_log_callback log_callback, void * user_data);
621
731
 
732
+ // Get the no_speech probability for the specified segment
733
+ WHISPER_API float whisper_full_get_segment_no_speech_prob (struct whisper_context * ctx, int i_segment);
734
+ WHISPER_API float whisper_full_get_segment_no_speech_prob_from_state(struct whisper_state * state, int i_segment);
622
735
  #ifdef __cplusplus
623
736
  }
624
737
  #endif
@@ -0,0 +1,95 @@
1
+ cmake_minimum_required(VERSION 3.16)
2
+ project(rnwhisper VERSION 1.0.0 LANGUAGES CXX C)
3
+
4
+ set(CMAKE_CXX_STANDARD 17)
5
+ set(CMAKE_CXX_STANDARD_REQUIRED ON)
6
+
7
+ # iOS specific settings
8
+ set(CMAKE_OSX_DEPLOYMENT_TARGET 13.0)
9
+ set(CMAKE_XCODE_ATTRIBUTE_ENABLE_BITCODE NO)
10
+
11
+ # Dependencies and compile options
12
+ add_definitions(
13
+ -DNDEBUG
14
+ -DO3
15
+ -DWSP_GGML_USE_CPU
16
+ -DWSP_GGML_USE_ACCELERATE
17
+ -DWSP_GGML_USE_METAL
18
+ -DWSP_GGML_METAL_USE_BF16
19
+ )
20
+
21
+ if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64;x86_64")
22
+ add_definitions(-DWSP_GGML_CPU_GENERIC)
23
+ endif ()
24
+
25
+ set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../cpp)
26
+
27
+ if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64")
28
+ set(SOURCE_FILES_ARCH
29
+ ${SOURCE_DIR}/ggml-cpu/arch/arm/quants.c
30
+ ${SOURCE_DIR}/ggml-cpu/arch/arm/repack.cpp
31
+ )
32
+ endif ()
33
+
34
+ # Define public headers
35
+ set(PUBLIC_HEADERS
36
+ ${SOURCE_DIR}/rn-whisper.h
37
+ ${SOURCE_DIR}/whisper.h
38
+ ${SOURCE_DIR}/ggml.h
39
+ )
40
+
41
+ # Create library target
42
+ add_library(rnwhisper SHARED
43
+ ${SOURCE_DIR}/ggml.c
44
+ ${SOURCE_DIR}/ggml-alloc.c
45
+ ${SOURCE_DIR}/ggml-backend.cpp
46
+ ${SOURCE_DIR}/ggml-backend-reg.cpp
47
+ ${SOURCE_DIR}/ggml-cpu/amx/amx.cpp
48
+ ${SOURCE_DIR}/ggml-cpu/amx/mmq.cpp
49
+ ${SOURCE_DIR}/ggml-cpu/ggml-cpu.c
50
+ ${SOURCE_DIR}/ggml-cpu/ggml-cpu.cpp
51
+ ${SOURCE_DIR}/ggml-cpu/quants.c
52
+ ${SOURCE_DIR}/ggml-cpu/traits.cpp
53
+ ${SOURCE_DIR}/ggml-cpu/repack.cpp
54
+ ${SOURCE_DIR}/ggml-cpu/unary-ops.cpp
55
+ ${SOURCE_DIR}/ggml-cpu/binary-ops.cpp
56
+ ${SOURCE_DIR}/ggml-cpu/vec.cpp
57
+ ${SOURCE_DIR}/ggml-cpu/ops.cpp
58
+ ${SOURCE_DIR}/ggml-metal.m
59
+ ${SOURCE_DIR}/ggml-opt.cpp
60
+ ${SOURCE_DIR}/ggml-threading.cpp
61
+ ${SOURCE_DIR}/ggml-quants.c
62
+ ${SOURCE_DIR}/gguf.cpp
63
+ ${SOURCE_DIR}/whisper.cpp
64
+ ${SOURCE_DIR}/rn-whisper.cpp
65
+ ${SOURCE_DIR}/rn-audioutils.cpp
66
+ ${SOURCE_FILES_ARCH}
67
+ )
68
+
69
+ # Setup include directories
70
+ target_include_directories(rnwhisper
71
+ PUBLIC
72
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../cpp>
73
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../cpp/ggml-cpu>
74
+ $<INSTALL_INTERFACE:include>
75
+ )
76
+
77
+ # Link required frameworks
78
+ target_link_libraries(rnwhisper PRIVATE
79
+ "-framework Accelerate"
80
+ "-framework Foundation"
81
+ "-framework Metal"
82
+ "-framework MetalKit"
83
+ )
84
+
85
+ # Set properties for framework
86
+ set_target_properties(rnwhisper PROPERTIES
87
+ MACOSX_FRAMEWORK_IDENTIFIER "com.rnwhisper"
88
+ MACOSX_FRAMEWORK_BUNDLE_VERSION 1.0.0
89
+ MACOSX_FRAMEWORK_SHORT_VERSION_STRING 1.0.0
90
+ FRAMEWORK TRUE
91
+ FRAMEWORK_VERSION 1.0.0
92
+ VERSION 1.0.0
93
+ PUBLIC_HEADER "${PUBLIC_HEADERS}"
94
+ XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC NO
95
+ )
package/ios/RNWhisper.h CHANGED
@@ -1,6 +1,11 @@
1
1
  #ifdef __cplusplus
2
+ #if RNWHISPER_BUILD_FROM_SOURCE
2
3
  #import "whisper.h"
3
4
  #import "rn-whisper.h"
5
+ #else
6
+ #import <rnwhisper/whisper.h>
7
+ #import <rnwhisper/rn-whisper.h>
8
+ #endif
4
9
  #endif
5
10
 
6
11
  #import <React/RCTBridgeModule.h>
package/ios/RNWhisper.mm CHANGED
@@ -50,6 +50,7 @@ RCT_REMAP_METHOD(initContext,
50
50
  BOOL isBundleAsset = [[modelOptions objectForKey:@"isBundleAsset"] boolValue];
51
51
  BOOL useGpu = [[modelOptions objectForKey:@"useGpu"] boolValue];
52
52
  BOOL useCoreMLIos = [[modelOptions objectForKey:@"useCoreMLIos"] boolValue];
53
+ BOOL useFlashAttn = [[modelOptions objectForKey:@"useFlashAttn"] boolValue];
53
54
 
54
55
  // For support debug assets in development mode
55
56
  BOOL downloadCoreMLAssets = [[modelOptions objectForKey:@"downloadCoreMLAssets"] boolValue];
@@ -79,6 +80,7 @@ RCT_REMAP_METHOD(initContext,
79
80
  contextId:contextId
80
81
  noCoreML:!useCoreMLIos
81
82
  noMetal:!useGpu
83
+ useFlashAttn:useFlashAttn
82
84
  ];
83
85
  if ([context getContext] == NULL) {
84
86
  reject(@"whisper_cpp_error", @"Failed to load the model", nil);
@@ -103,42 +105,17 @@ RCT_REMAP_METHOD(initContext,
103
105
  ];
104
106
  }
105
107
 
106
- RCT_REMAP_METHOD(transcribeFile,
107
- withContextId:(int)contextId
108
- withJobId:(int)jobId
109
- withWaveFile:(NSString *)waveFilePath
110
- withOptions:(NSDictionary *)options
111
- withResolver:(RCTPromiseResolveBlock)resolve
112
- withRejecter:(RCTPromiseRejectBlock)reject)
108
+ - (void)transcribeData:(RNWhisperContext *)context
109
+ withContextId:(int)contextId
110
+ withJobId:(int)jobId
111
+ withData:(float *)data
112
+ withDataCount:(int)count
113
+ withOptions:(NSDictionary *)options
114
+ withResolver:(RCTPromiseResolveBlock)resolve
115
+ withRejecter:(RCTPromiseRejectBlock)reject
113
116
  {
114
- RNWhisperContext *context = contexts[[NSNumber numberWithInt:contextId]];
115
-
116
- if (context == nil) {
117
- reject(@"whisper_error", @"Context not found", nil);
118
- return;
119
- }
120
- if ([context isCapturing]) {
121
- reject(@"whisper_error", @"The context is in realtime transcribe mode", nil);
122
- return;
123
- }
124
- if ([context isTranscribing]) {
125
- reject(@"whisper_error", @"Context is already transcribing", nil);
126
- return;
127
- }
128
-
129
- NSString *path = waveFilePath;
130
- if ([path hasPrefix:@"http://"] || [path hasPrefix:@"https://"]) {
131
- path = [RNWhisperDownloader downloadFile:path toFile:nil];
132
- }
133
-
134
- int count = 0;
135
- float *waveFile = [RNWhisperAudioUtils decodeWaveFile:path count:&count];
136
- if (waveFile == nil) {
137
- reject(@"whisper_error", @"Invalid file", nil);
138
- return;
139
- }
140
- [context transcribeFile:jobId
141
- audioData:waveFile
117
+ [context transcribeData:jobId
118
+ audioData:data
142
119
  audioDataCount:count
143
120
  options:options
144
121
  onProgress: ^(int progress) {
@@ -171,11 +148,9 @@ RCT_REMAP_METHOD(transcribeFile,
171
148
  }
172
149
  onEnd: ^(int code) {
173
150
  if (code != 0 && code != 999) {
174
- free(waveFile);
175
151
  reject(@"whisper_cpp_error", [NSString stringWithFormat:@"Failed to transcribe the file. Code: %d", code], nil);
176
152
  return;
177
153
  }
178
- free(waveFile);
179
154
  NSMutableDictionary *result = [context getTextSegments];
180
155
  result[@"isAborted"] = @([context isStoppedByAction]);
181
156
  resolve(result);
@@ -183,6 +158,99 @@ RCT_REMAP_METHOD(transcribeFile,
183
158
  ];
184
159
  }
185
160
 
161
+ RCT_REMAP_METHOD(transcribeFile,
162
+ withContextId:(int)contextId
163
+ withJobId:(int)jobId
164
+ withWaveFile:(NSString *)waveFilePathOrDataBase64
165
+ withOptions:(NSDictionary *)options
166
+ withResolver:(RCTPromiseResolveBlock)resolve
167
+ withRejecter:(RCTPromiseRejectBlock)reject)
168
+ {
169
+ RNWhisperContext *context = contexts[[NSNumber numberWithInt:contextId]];
170
+
171
+ if (context == nil) {
172
+ reject(@"whisper_error", @"Context not found", nil);
173
+ return;
174
+ }
175
+ if ([context isCapturing]) {
176
+ reject(@"whisper_error", @"The context is in realtime transcribe mode", nil);
177
+ return;
178
+ }
179
+ if ([context isTranscribing]) {
180
+ reject(@"whisper_error", @"Context is already transcribing", nil);
181
+ return;
182
+ }
183
+
184
+ float *data = nil;
185
+ int count = 0;
186
+ if ([waveFilePathOrDataBase64 hasPrefix:@"http://"] || [waveFilePathOrDataBase64 hasPrefix:@"https://"]) {
187
+ NSString *path = [RNWhisperDownloader downloadFile:waveFilePathOrDataBase64 toFile:nil];
188
+ data = [RNWhisperAudioUtils decodeWaveFile:path count:&count];
189
+ } else if ([waveFilePathOrDataBase64 hasPrefix:@"data:audio/wav;base64,"]) {
190
+ NSData *waveData = [[NSData alloc] initWithBase64EncodedString:[waveFilePathOrDataBase64 substringFromIndex:22] options:0];
191
+ data = [RNWhisperAudioUtils decodeWaveData:waveData count:&count cutHeader:YES];
192
+ } else {
193
+ data = [RNWhisperAudioUtils decodeWaveFile:waveFilePathOrDataBase64 count:&count];
194
+ }
195
+ if (data == nil) {
196
+ reject(@"whisper_error", @"Invalid file", nil);
197
+ return;
198
+ }
199
+
200
+ [self transcribeData:context
201
+ withContextId:contextId
202
+ withJobId:jobId
203
+ withData:data
204
+ withDataCount:count
205
+ withOptions:options
206
+ withResolver:resolve
207
+ withRejecter:reject
208
+ ];
209
+ }
210
+
211
+ RCT_REMAP_METHOD(transcribeData,
212
+ withContextId:(int)contextId
213
+ withJobId:(int)jobId
214
+ withData:(NSString *)dataBase64 // pcm data base64 encoded
215
+ withOptions:(NSDictionary *)options
216
+ withResolver:(RCTPromiseResolveBlock)resolve
217
+ withRejecter:(RCTPromiseRejectBlock)reject)
218
+ {
219
+ RNWhisperContext *context = contexts[[NSNumber numberWithInt:contextId]];
220
+
221
+ if (context == nil) {
222
+ reject(@"whisper_error", @"Context not found", nil);
223
+ return;
224
+ }
225
+ if ([context isCapturing]) {
226
+ reject(@"whisper_error", @"The context is in realtime transcribe mode", nil);
227
+ return;
228
+ }
229
+ if ([context isTranscribing]) {
230
+ reject(@"whisper_error", @"Context is already transcribing", nil);
231
+ return;
232
+ }
233
+
234
+ NSData *pcmData = [[NSData alloc] initWithBase64EncodedString:dataBase64 options:0];
235
+ int count = 0;
236
+ float *data = [RNWhisperAudioUtils decodeWaveData:pcmData count:&count cutHeader:NO];
237
+
238
+ if (data == nil) {
239
+ reject(@"whisper_error", @"Invalid data", nil);
240
+ return;
241
+ }
242
+
243
+ [self transcribeData:context
244
+ withContextId:contextId
245
+ withJobId:jobId
246
+ withData:data
247
+ withDataCount:count
248
+ withOptions:options
249
+ withResolver:resolve
250
+ withRejecter:reject
251
+ ];
252
+ }
253
+
186
254
  RCT_REMAP_METHOD(startRealtimeTranscribe,
187
255
  withContextId:(int)contextId
188
256
  withJobId:(int)jobId
@@ -244,6 +312,25 @@ RCT_REMAP_METHOD(abortTranscribe,
244
312
  resolve(nil);
245
313
  }
246
314
 
315
+ RCT_REMAP_METHOD(bench,
316
+ withContextId:(int)contextId
317
+ withMaxThreads:(int)maxThreads
318
+ withResolver:(RCTPromiseResolveBlock)resolve
319
+ withRejecter:(RCTPromiseRejectBlock)reject)
320
+ {
321
+ RNWhisperContext *context = contexts[[NSNumber numberWithInt:contextId]];
322
+ if (context == nil) {
323
+ reject(@"whisper_error", @"Context not found", nil);
324
+ return;
325
+ }
326
+ if ([context isTranscribing]) {
327
+ reject(@"whisper_error", @"The context is transcribing", nil);
328
+ return;
329
+ }
330
+ NSString *result = [context bench:maxThreads];
331
+ resolve(result);
332
+ }
333
+
247
334
  RCT_REMAP_METHOD(releaseContext,
248
335
  withContextId:(int)contextId
249
336
  withResolver:(RCTPromiseResolveBlock)resolve
@@ -2,6 +2,7 @@
2
2
 
3
3
  @interface RNWhisperAudioUtils : NSObject
4
4
 
5
+ + (float *)decodeWaveData:(NSData*)data count:(int *)count cutHeader:(BOOL)cutHeader;
5
6
  + (float *)decodeWaveFile:(NSString*)filePath count:(int *)count;
6
7
 
7
8
  @end
@@ -1,27 +1,38 @@
1
1
  #import "RNWhisperAudioUtils.h"
2
+ #if RNWHISPER_BUILD_FROM_SOURCE
2
3
  #import "whisper.h"
4
+ #else
5
+ #import <rnwhisper/whisper.h>
6
+ #endif
3
7
 
4
8
  @implementation RNWhisperAudioUtils
5
9
 
10
+ + (float *)decodeWaveData:(NSData*)data count:(int *)count cutHeader:(BOOL)cutHeader {
11
+ NSData *waveData = data;
12
+ if (cutHeader) {
13
+ // just cut 44 bytes from the beginning
14
+ waveData = [data subdataWithRange:NSMakeRange(44, [data length]-44)];
15
+ }
16
+ const short *shortArray = (const short *)[waveData bytes];
17
+ int shortCount = (int) ([waveData length] / sizeof(short));
18
+ float *floatArray = (float *) malloc(shortCount * sizeof(float));
19
+ for (NSInteger i = 0; i < shortCount; i++) {
20
+ float floatValue = ((float)shortArray[i]) / 32767.0;
21
+ floatValue = MAX(floatValue, -1.0);
22
+ floatValue = MIN(floatValue, 1.0);
23
+ floatArray[i] = floatValue;
24
+ }
25
+ *count = shortCount;
26
+ return floatArray;
27
+ }
28
+
6
29
  + (float *)decodeWaveFile:(NSString*)filePath count:(int *)count {
7
30
  NSURL *url = [NSURL fileURLWithPath:filePath];
8
31
  NSData *fileData = [NSData dataWithContentsOfURL:url];
9
32
  if (fileData == nil) {
10
33
  return nil;
11
34
  }
12
- NSMutableData *waveData = [[NSMutableData alloc] init];
13
- [waveData appendData:[fileData subdataWithRange:NSMakeRange(44, [fileData length]-44)]];
14
- const short *shortArray = (const short *)[waveData bytes];
15
- int shortCount = (int) ([waveData length] / sizeof(short));
16
- float *floatArray = (float *) malloc(shortCount * sizeof(float));
17
- for (NSInteger i = 0; i < shortCount; i++) {
18
- float floatValue = ((float)shortArray[i]) / 32767.0;
19
- floatValue = MAX(floatValue, -1.0);
20
- floatValue = MIN(floatValue, 1.0);
21
- floatArray[i] = floatValue;
22
- }
23
- *count = shortCount;
24
- return floatArray;
35
+ return [RNWhisperAudioUtils decodeWaveData:fileData count:count cutHeader:YES];
25
36
  }
26
37
 
27
38
  @end