whisper.rn 0.4.0-rc.9 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. package/README.md +74 -1
  2. package/android/build.gradle +12 -3
  3. package/android/src/main/CMakeLists.txt +43 -13
  4. package/android/src/main/java/com/rnwhisper/RNWhisper.java +211 -0
  5. package/android/src/main/java/com/rnwhisper/WhisperContext.java +64 -36
  6. package/android/src/main/java/com/rnwhisper/WhisperVadContext.java +157 -0
  7. package/android/src/main/jni.cpp +205 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnwhisper.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnwhisper_v8fp16_va_2.so +0 -0
  10. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper.so +0 -0
  11. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper_vfpv4.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/librnwhisper.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnwhisper_x86_64.so +0 -0
  14. package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +26 -0
  15. package/android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java +26 -0
  16. package/cpp/coreml/whisper-compat.h +10 -0
  17. package/cpp/coreml/whisper-compat.m +35 -0
  18. package/cpp/coreml/whisper-decoder-impl.h +27 -15
  19. package/cpp/coreml/whisper-decoder-impl.m +36 -10
  20. package/cpp/coreml/whisper-encoder-impl.h +21 -9
  21. package/cpp/coreml/whisper-encoder-impl.m +29 -3
  22. package/cpp/ggml-alloc.c +39 -37
  23. package/cpp/ggml-alloc.h +1 -1
  24. package/cpp/ggml-backend-impl.h +55 -27
  25. package/cpp/ggml-backend-reg.cpp +591 -0
  26. package/cpp/ggml-backend.cpp +336 -955
  27. package/cpp/ggml-backend.h +70 -42
  28. package/cpp/ggml-common.h +57 -49
  29. package/cpp/ggml-cpp.h +39 -0
  30. package/cpp/ggml-cpu/amx/amx.cpp +221 -0
  31. package/cpp/ggml-cpu/amx/amx.h +8 -0
  32. package/cpp/ggml-cpu/amx/common.h +91 -0
  33. package/cpp/ggml-cpu/amx/mmq.cpp +2511 -0
  34. package/cpp/ggml-cpu/amx/mmq.h +10 -0
  35. package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  36. package/cpp/ggml-cpu/arch/arm/quants.c +4113 -0
  37. package/cpp/ggml-cpu/arch/arm/repack.cpp +2162 -0
  38. package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  39. package/cpp/ggml-cpu/arch/x86/quants.c +4310 -0
  40. package/cpp/ggml-cpu/arch/x86/repack.cpp +3284 -0
  41. package/cpp/ggml-cpu/arch-fallback.h +184 -0
  42. package/cpp/ggml-cpu/binary-ops.cpp +158 -0
  43. package/cpp/ggml-cpu/binary-ops.h +16 -0
  44. package/cpp/ggml-cpu/common.h +72 -0
  45. package/cpp/ggml-cpu/ggml-cpu-impl.h +511 -0
  46. package/cpp/ggml-cpu/ggml-cpu.c +3473 -0
  47. package/cpp/ggml-cpu/ggml-cpu.cpp +671 -0
  48. package/cpp/ggml-cpu/ops.cpp +9085 -0
  49. package/cpp/ggml-cpu/ops.h +111 -0
  50. package/cpp/ggml-cpu/quants.c +1157 -0
  51. package/cpp/ggml-cpu/quants.h +89 -0
  52. package/cpp/ggml-cpu/repack.cpp +1570 -0
  53. package/cpp/ggml-cpu/repack.h +98 -0
  54. package/cpp/ggml-cpu/simd-mappings.h +1006 -0
  55. package/cpp/ggml-cpu/traits.cpp +36 -0
  56. package/cpp/ggml-cpu/traits.h +38 -0
  57. package/cpp/ggml-cpu/unary-ops.cpp +186 -0
  58. package/cpp/ggml-cpu/unary-ops.h +28 -0
  59. package/cpp/ggml-cpu/vec.cpp +321 -0
  60. package/cpp/ggml-cpu/vec.h +973 -0
  61. package/cpp/ggml-cpu.h +143 -0
  62. package/cpp/ggml-impl.h +417 -23
  63. package/cpp/ggml-metal-impl.h +622 -0
  64. package/cpp/ggml-metal.h +9 -9
  65. package/cpp/ggml-metal.m +3451 -1344
  66. package/cpp/ggml-opt.cpp +1037 -0
  67. package/cpp/ggml-opt.h +237 -0
  68. package/cpp/ggml-quants.c +296 -10818
  69. package/cpp/ggml-quants.h +78 -125
  70. package/cpp/ggml-threading.cpp +12 -0
  71. package/cpp/ggml-threading.h +14 -0
  72. package/cpp/ggml-whisper-sim.metallib +0 -0
  73. package/cpp/ggml-whisper.metallib +0 -0
  74. package/cpp/ggml.c +4633 -21450
  75. package/cpp/ggml.h +320 -661
  76. package/cpp/gguf.cpp +1347 -0
  77. package/cpp/gguf.h +202 -0
  78. package/cpp/rn-whisper.cpp +4 -11
  79. package/cpp/whisper-arch.h +197 -0
  80. package/cpp/whisper.cpp +2022 -495
  81. package/cpp/whisper.h +75 -18
  82. package/ios/CMakeLists.txt +95 -0
  83. package/ios/RNWhisper.h +5 -0
  84. package/ios/RNWhisper.mm +147 -0
  85. package/ios/RNWhisperAudioUtils.m +4 -0
  86. package/ios/RNWhisperContext.h +5 -0
  87. package/ios/RNWhisperContext.mm +22 -26
  88. package/ios/RNWhisperVadContext.h +29 -0
  89. package/ios/RNWhisperVadContext.mm +152 -0
  90. package/ios/rnwhisper.xcframework/Info.plist +74 -0
  91. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
  92. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
  93. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +354 -0
  94. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-common.h +1861 -0
  95. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
  96. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
  97. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +603 -0
  98. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
  99. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal.h +66 -0
  100. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-opt.h +237 -0
  101. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-quants.h +100 -0
  102. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-threading.h +14 -0
  103. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +2221 -0
  104. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/gguf.h +202 -0
  105. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
  106. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
  107. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-whisper.h +52 -0
  108. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper-arch.h +197 -0
  109. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper.h +739 -0
  110. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Info.plist +0 -0
  111. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  112. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
  113. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
  114. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
  115. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +354 -0
  116. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +1861 -0
  117. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
  118. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
  119. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +603 -0
  120. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
  121. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal.h +66 -0
  122. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +237 -0
  123. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +100 -0
  124. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-threading.h +14 -0
  125. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +2221 -0
  126. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/gguf.h +202 -0
  127. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
  128. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
  129. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +52 -0
  130. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper-arch.h +197 -0
  131. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +739 -0
  132. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  133. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +101 -0
  134. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  135. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  136. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
  137. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
  138. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +354 -0
  139. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-common.h +1861 -0
  140. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
  141. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
  142. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +603 -0
  143. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
  144. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal.h +66 -0
  145. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-opt.h +237 -0
  146. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-quants.h +100 -0
  147. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-threading.h +14 -0
  148. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +2221 -0
  149. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/gguf.h +202 -0
  150. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
  151. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
  152. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-whisper.h +52 -0
  153. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper-arch.h +197 -0
  154. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper.h +739 -0
  155. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Info.plist +0 -0
  156. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  157. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
  158. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
  159. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
  160. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +354 -0
  161. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +1861 -0
  162. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
  163. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
  164. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +603 -0
  165. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
  166. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal.h +66 -0
  167. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +237 -0
  168. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +100 -0
  169. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-threading.h +14 -0
  170. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +2221 -0
  171. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/gguf.h +202 -0
  172. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
  173. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
  174. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +52 -0
  175. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper-arch.h +197 -0
  176. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +739 -0
  177. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  178. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +101 -0
  179. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  180. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  181. package/jest/mock.js +24 -0
  182. package/lib/commonjs/NativeRNWhisper.js.map +1 -1
  183. package/lib/commonjs/index.js +111 -1
  184. package/lib/commonjs/index.js.map +1 -1
  185. package/lib/commonjs/version.json +1 -1
  186. package/lib/module/NativeRNWhisper.js.map +1 -1
  187. package/lib/module/index.js +112 -0
  188. package/lib/module/index.js.map +1 -1
  189. package/lib/module/version.json +1 -1
  190. package/lib/typescript/NativeRNWhisper.d.ts +35 -0
  191. package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
  192. package/lib/typescript/index.d.ts +39 -3
  193. package/lib/typescript/index.d.ts.map +1 -1
  194. package/package.json +10 -6
  195. package/src/NativeRNWhisper.ts +48 -0
  196. package/src/index.ts +132 -1
  197. package/src/version.json +1 -1
  198. package/whisper-rn.podspec +11 -18
  199. package/cpp/README.md +0 -4
  200. package/cpp/ggml-aarch64.c +0 -3209
  201. package/cpp/ggml-aarch64.h +0 -39
  202. package/cpp/ggml-cpu-impl.h +0 -614
package/README.md CHANGED
@@ -25,6 +25,8 @@ npm install whisper.rn
25
25
 
26
26
  Please re-run `npx pod-install` again.
27
27
 
28
+ By default, `whisper.rn` will use pre-built `rnwhisper.xcframework` for iOS. If you want to build from source, please set `RNWHISPER_BUILD_FROM_SOURCE` to `1` in your Podfile.
29
+
28
30
  If you want to use `medium` or `large` model, the [Extended Virtual Addressing](https://developer.apple.com/documentation/bundleresources/entitlements/com_apple_developer_kernel_extended-virtual-addressing) capability is recommended to enable on iOS project.
29
31
 
30
32
  #### Android
@@ -36,7 +38,9 @@ Add proguard rule if it's enabled in project (android/app/proguard-rules.pro):
36
38
  -keep class com.rnwhisper.** { *; }
37
39
  ```
38
40
 
39
- For build, it's recommended to use `ndkVersion = "24.0.8215888"` (or above) in your root project build configuration for Apple Silicon Macs. Otherwise please follow this trobleshooting [issue](./TROUBLESHOOTING.md#android-got-build-error-unknown-host-cpu-architecture-arm64-on-apple-silicon-macs).
41
+ By default, `whisper.rn` will use pre-built libraries for Android. If you want to build from source, please set `rnwhisperBuildFromSource` to `true` in `android/gradle.properties`.
42
+
43
+ For build from source, it's recommended to use `ndkVersion = "24.0.8215888"` (or above) in your root project build configuration for Apple Silicon Macs. Otherwise please follow this trobleshooting [issue](./TROUBLESHOOTING.md#android-got-build-error-unknown-host-cpu-architecture-arm64-on-apple-silicon-macs).
40
44
 
41
45
  #### Expo
42
46
 
@@ -99,6 +103,75 @@ subscribe(evt => {
99
103
  })
100
104
  ```
101
105
 
106
+ ## Voice Activity Detection (VAD)
107
+
108
+ Voice Activity Detection allows you to detect speech segments in audio data using the Silero VAD model.
109
+
110
+ #### Initialize VAD Context
111
+
112
+ ```typescript
113
+ import { initWhisperVad } from 'whisper.rn'
114
+
115
+ const vadContext = await initWhisperVad({
116
+ filePath: require('./assets/ggml-silero-v5.1.2.bin'), // VAD model file
117
+ useGpu: true, // Use GPU acceleration (iOS only)
118
+ nThreads: 4, // Number of threads for processing
119
+ })
120
+ ```
121
+
122
+ #### Detect Speech Segments
123
+
124
+ ##### From Audio Files
125
+
126
+ ```typescript
127
+ // Detect speech in audio file (supports same formats as transcribe)
128
+ const segments = await vadContext.detectSpeech(require('./assets/audio.wav'), {
129
+ threshold: 0.5, // Speech probability threshold (0.0-1.0)
130
+ minSpeechDurationMs: 250, // Minimum speech duration in ms
131
+ minSilenceDurationMs: 100, // Minimum silence duration in ms
132
+ maxSpeechDurationS: 30, // Maximum speech duration in seconds
133
+ speechPadMs: 30, // Padding around speech segments in ms
134
+ samplesOverlap: 0.1, // Overlap between analysis windows
135
+ })
136
+
137
+ // Also supports:
138
+ // - File paths: vadContext.detectSpeech('path/to/audio.wav', options)
139
+ // - HTTP URLs: vadContext.detectSpeech('https://example.com/audio.wav', options)
140
+ // - Base64 WAV: vadContext.detectSpeech('data:audio/wav;base64,...', options)
141
+ // - Assets: vadContext.detectSpeech(require('./assets/audio.wav'), options)
142
+ ```
143
+
144
+ ##### From Raw Audio Data
145
+
146
+ ```typescript
147
+ // Detect speech in base64 encoded float32 PCM data
148
+ const segments = await vadContext.detectSpeechData(base64AudioData, {
149
+ threshold: 0.5,
150
+ minSpeechDurationMs: 250,
151
+ minSilenceDurationMs: 100,
152
+ maxSpeechDurationS: 30,
153
+ speechPadMs: 30,
154
+ samplesOverlap: 0.1,
155
+ })
156
+ ```
157
+
158
+ #### Process Results
159
+
160
+ ```typescript
161
+ segments.forEach((segment, index) => {
162
+ console.log(`Segment ${index + 1}: ${segment.t0.toFixed(2)}s - ${segment.t1.toFixed(2)}s`)
163
+ console.log(`Duration: ${(segment.t1 - segment.t0).toFixed(2)}s`)
164
+ })
165
+ ```
166
+
167
+ #### Release VAD Context
168
+
169
+ ```typescript
170
+ await vadContext.release()
171
+ // Or release all VAD contexts
172
+ await releaseAllWhisperVad()
173
+ ```
174
+
102
175
  In iOS, You may need to change the Audio Session so that it can be used with other audio playback, or to optimize the quality of the recording. So we have provided AudioSession utilities for you:
103
176
 
104
177
  Option 1 - Use options in transcribeRealtime:
@@ -53,9 +53,18 @@ android {
53
53
  }
54
54
  }
55
55
  }
56
- externalNativeBuild {
57
- cmake {
58
- path = file('src/main/CMakeLists.txt')
56
+ def rnwhisperBuildFromSource = project.properties["rnwhisperBuildFromSource"]
57
+ if (rnwhisperBuildFromSource == "true") {
58
+ externalNativeBuild {
59
+ cmake {
60
+ path = file('src/main/CMakeLists.txt')
61
+ }
62
+ }
63
+ // Exclude jniLibs
64
+ sourceSets {
65
+ main {
66
+ jniLibs.srcDirs = []
67
+ }
59
68
  }
60
69
  }
61
70
  buildTypes {
@@ -2,16 +2,35 @@ cmake_minimum_required(VERSION 3.10)
2
2
 
3
3
  project(whisper.rn)
4
4
 
5
- set(CMAKE_CXX_STANDARD 11)
5
+ set(CMAKE_CXX_STANDARD 17)
6
6
  set(RNWHISPER_LIB_DIR ${CMAKE_SOURCE_DIR}/../../../cpp)
7
7
 
8
+ include_directories(
9
+ ${RNWHISPER_LIB_DIR}
10
+ ${RNWHISPER_LIB_DIR}/ggml-cpu
11
+ )
12
+
8
13
  set(
9
14
  SOURCE_FILES
10
15
  ${RNWHISPER_LIB_DIR}/ggml.c
11
16
  ${RNWHISPER_LIB_DIR}/ggml-alloc.c
12
17
  ${RNWHISPER_LIB_DIR}/ggml-backend.cpp
18
+ ${RNWHISPER_LIB_DIR}/ggml-backend-reg.cpp
19
+ ${RNWHISPER_LIB_DIR}/ggml-cpu/amx/amx.cpp
20
+ ${RNWHISPER_LIB_DIR}/ggml-cpu/amx/mmq.cpp
21
+ ${RNWHISPER_LIB_DIR}/ggml-cpu/ggml-cpu.c
22
+ ${RNWHISPER_LIB_DIR}/ggml-cpu/ggml-cpu.cpp
23
+ ${RNWHISPER_LIB_DIR}/ggml-cpu/quants.c
24
+ ${RNWHISPER_LIB_DIR}/ggml-cpu/traits.cpp
25
+ ${RNWHISPER_LIB_DIR}/ggml-cpu/repack.cpp
26
+ ${RNWHISPER_LIB_DIR}/ggml-cpu/unary-ops.cpp
27
+ ${RNWHISPER_LIB_DIR}/ggml-cpu/binary-ops.cpp
28
+ ${RNWHISPER_LIB_DIR}/ggml-cpu/vec.cpp
29
+ ${RNWHISPER_LIB_DIR}/ggml-cpu/ops.cpp
30
+ ${RNWHISPER_LIB_DIR}/ggml-opt.cpp
31
+ ${RNWHISPER_LIB_DIR}/ggml-threading.cpp
13
32
  ${RNWHISPER_LIB_DIR}/ggml-quants.c
14
- ${RNWHISPER_LIB_DIR}/ggml-aarch64.c
33
+ ${RNWHISPER_LIB_DIR}/gguf.cpp
15
34
  ${RNWHISPER_LIB_DIR}/whisper.cpp
16
35
  ${RNWHISPER_LIB_DIR}/rn-audioutils.cpp
17
36
  ${RNWHISPER_LIB_DIR}/rn-whisper.cpp
@@ -20,45 +39,56 @@ set(
20
39
 
21
40
  find_library(LOG_LIB log)
22
41
 
23
- function(build_library target_name)
42
+ function(build_library target_name arch cpu_flags)
43
+ if (NOT ${arch} STREQUAL "generic")
44
+ set(SOURCE_FILES_ARCH
45
+ ${RNWHISPER_LIB_DIR}/ggml-cpu/arch/${arch}/quants.c
46
+ ${RNWHISPER_LIB_DIR}/ggml-cpu/arch/${arch}/repack.cpp
47
+ )
48
+ endif ()
49
+
24
50
  add_library(
25
51
  ${target_name}
26
52
  SHARED
27
53
  ${SOURCE_FILES}
54
+ ${SOURCE_FILES_ARCH}
28
55
  )
29
56
 
30
57
  target_link_libraries(${target_name} ${LOG_LIB} android)
31
58
 
32
- if (${target_name} STREQUAL "whisper_v8fp16_va")
33
- target_compile_options(${target_name} PRIVATE -march=armv8.2-a+fp16)
34
- elseif (${target_name} STREQUAL "whisper_vfpv4")
35
- target_compile_options(${target_name} PRIVATE -mfpu=neon-vfpv4)
59
+ if (${arch} STREQUAL "generic")
60
+ target_compile_options(${target_name} PRIVATE -DWSP_GGML_CPU_GENERIC)
36
61
  endif ()
37
62
 
63
+ target_compile_options(${target_name} PRIVATE -DWSP_GGML_USE_CPU -DWSP_GGML_USE_CPU_REPACK -pthread ${cpu_flags})
64
+
38
65
  if (${CMAKE_BUILD_TYPE} STREQUAL "Debug")
39
66
  target_compile_options(${target_name} PRIVATE -DRNWHISPER_ANDROID_ENABLE_LOGGING)
40
67
  endif ()
41
68
 
42
69
  # NOTE: If you want to debug the native code, you can uncomment if and endif
70
+ # Note that it will be extremely slow
43
71
  # if (NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug")
44
-
45
- target_compile_options(${target_name} PRIVATE -O3 -DNDEBUG -pthread)
72
+ target_compile_options(${target_name} PRIVATE -O3 -DNDEBUG)
46
73
  target_compile_options(${target_name} PRIVATE -fvisibility=hidden -fvisibility-inlines-hidden)
47
74
  target_compile_options(${target_name} PRIVATE -ffunction-sections -fdata-sections)
48
75
 
49
76
  target_link_options(${target_name} PRIVATE -Wl,--gc-sections)
50
77
  target_link_options(${target_name} PRIVATE -Wl,--exclude-libs,ALL)
51
78
  target_link_options(${target_name} PRIVATE -flto)
52
-
53
79
  # endif ()
54
80
  endfunction()
55
81
 
56
- build_library("whisper") # Default target
82
+ build_library("rnwhisper" "generic" "")
57
83
 
58
84
  if (${ANDROID_ABI} STREQUAL "arm64-v8a")
59
- build_library("whisper_v8fp16_va")
85
+ build_library("rnwhisper_v8fp16_va_2" "arm" "-march=armv8.2-a+fp16")
60
86
  elseif (${ANDROID_ABI} STREQUAL "armeabi-v7a")
61
- build_library("whisper_vfpv4")
87
+ build_library("rnwhisper_vfpv4" "arm" "-mfpu=neon-vfpv4")
88
+ elseif (${ANDROID_ABI} STREQUAL "x86_64")
89
+ # x86_64 target
90
+ build_library("rnwhisper_x86_64" "x86" "-march=x86-64" "-mtune=intel" "-msse4.2" "-mpopcnt")
62
91
  endif ()
63
92
 
93
+
64
94
  include_directories(${RNWHISPER_LIB_DIR})
@@ -13,6 +13,7 @@ import com.facebook.react.bridge.ReactMethod;
13
13
  import com.facebook.react.bridge.LifecycleEventListener;
14
14
  import com.facebook.react.bridge.ReadableMap;
15
15
  import com.facebook.react.bridge.WritableMap;
16
+ import com.facebook.react.bridge.WritableArray;
16
17
  import com.facebook.react.bridge.Arguments;
17
18
 
18
19
  import java.util.HashMap;
@@ -47,6 +48,7 @@ public class RNWhisper implements LifecycleEventListener {
47
48
  private HashMap<AsyncTask, String> tasks = new HashMap<>();
48
49
 
49
50
  private HashMap<Integer, WhisperContext> contexts = new HashMap<>();
51
+ private HashMap<Integer, WhisperVadContext> vadContexts = new HashMap<>();
50
52
 
51
53
  private int getResourceIdentifier(String filePath) {
52
54
  int identifier = reactContext.getResources().getIdentifier(
@@ -344,6 +346,211 @@ public class RNWhisper implements LifecycleEventListener {
344
346
  tasks.put(task, "releaseAllContexts");
345
347
  }
346
348
 
349
+ public void initVadContext(final ReadableMap options, final Promise promise) {
350
+ AsyncTask task = new AsyncTask<Void, Void, Integer>() {
351
+ private Exception exception;
352
+
353
+ @Override
354
+ protected Integer doInBackground(Void... voids) {
355
+ try {
356
+ String modelPath = options.getString("filePath");
357
+ boolean isBundleAsset = options.getBoolean("isBundleAsset");
358
+
359
+ String modelFilePath = modelPath;
360
+ if (!isBundleAsset && (modelPath.startsWith("http://") || modelPath.startsWith("https://"))) {
361
+ modelFilePath = downloader.downloadFile(modelPath);
362
+ }
363
+
364
+ long vadContext;
365
+ int resId = getResourceIdentifier(modelFilePath);
366
+ if (resId > 0) {
367
+ vadContext = WhisperContext.initVadContextWithInputStream(
368
+ new PushbackInputStream(reactContext.getResources().openRawResource(resId))
369
+ );
370
+ } else if (isBundleAsset) {
371
+ vadContext = WhisperContext.initVadContextWithAsset(reactContext.getAssets(), modelFilePath);
372
+ } else {
373
+ vadContext = WhisperContext.initVadContext(modelFilePath);
374
+ }
375
+ if (vadContext == 0) {
376
+ throw new Exception("Failed to initialize VAD context");
377
+ }
378
+ int id = Math.abs(new Random().nextInt());
379
+ WhisperVadContext whisperVadContext = new WhisperVadContext(id, reactContext, vadContext);
380
+ vadContexts.put(id, whisperVadContext);
381
+ return id;
382
+ } catch (Exception e) {
383
+ exception = e;
384
+ return null;
385
+ }
386
+ }
387
+
388
+ @Override
389
+ protected void onPostExecute(Integer id) {
390
+ if (exception != null) {
391
+ promise.reject(exception);
392
+ return;
393
+ }
394
+ WritableMap result = Arguments.createMap();
395
+ result.putInt("contextId", id);
396
+ result.putBoolean("gpu", false);
397
+ result.putString("reasonNoGPU", "Currently not supported");
398
+ promise.resolve(result);
399
+ tasks.remove(this);
400
+ }
401
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
402
+ tasks.put(task, "initVadContext");
403
+ }
404
+
405
+ public void vadDetectSpeech(double id, String audioDataBase64, ReadableMap options, Promise promise) {
406
+ final WhisperVadContext vadContext = vadContexts.get((int) id);
407
+ if (vadContext == null) {
408
+ promise.reject("VAD context not found");
409
+ return;
410
+ }
411
+
412
+ AsyncTask task = new AsyncTask<Void, Void, WritableArray>() {
413
+ private Exception exception;
414
+
415
+ @Override
416
+ protected WritableArray doInBackground(Void... voids) {
417
+ try {
418
+ return vadContext.detectSpeech(audioDataBase64, options);
419
+ } catch (Exception e) {
420
+ exception = e;
421
+ return null;
422
+ }
423
+ }
424
+
425
+ @Override
426
+ protected void onPostExecute(WritableArray segments) {
427
+ if (exception != null) {
428
+ promise.reject(exception);
429
+ return;
430
+ }
431
+ promise.resolve(segments);
432
+ tasks.remove(this);
433
+ }
434
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
435
+ tasks.put(task, "vadDetectSpeech-" + id);
436
+ }
437
+
438
+ public void vadDetectSpeechFile(double id, String filePathOrBase64, ReadableMap options, Promise promise) {
439
+ final WhisperVadContext vadContext = vadContexts.get((int) id);
440
+ if (vadContext == null) {
441
+ promise.reject("VAD context not found");
442
+ return;
443
+ }
444
+
445
+ AsyncTask task = new AsyncTask<Void, Void, WritableArray>() {
446
+ private Exception exception;
447
+
448
+ @Override
449
+ protected WritableArray doInBackground(Void... voids) {
450
+ try {
451
+ // Handle file processing like transcribeFile does
452
+ String filePath = filePathOrBase64;
453
+ if (filePathOrBase64.startsWith("http://") || filePathOrBase64.startsWith("https://")) {
454
+ filePath = downloader.downloadFile(filePathOrBase64);
455
+ }
456
+
457
+ float[] audioData;
458
+ int resId = getResourceIdentifier(filePath);
459
+ if (resId > 0) {
460
+ audioData = AudioUtils.decodeWaveFile(reactContext.getResources().openRawResource(resId));
461
+ } else if (filePathOrBase64.startsWith("data:audio/wav;base64,")) {
462
+ audioData = AudioUtils.decodeWaveData(filePathOrBase64);
463
+ } else {
464
+ audioData = AudioUtils.decodeWaveFile(new java.io.FileInputStream(new java.io.File(filePath)));
465
+ }
466
+
467
+ if (audioData == null) {
468
+ throw new Exception("Failed to load audio file: " + filePathOrBase64);
469
+ }
470
+
471
+ return vadContext.detectSpeechWithAudioData(audioData, options);
472
+ } catch (Exception e) {
473
+ exception = e;
474
+ return null;
475
+ }
476
+ }
477
+
478
+ @Override
479
+ protected void onPostExecute(WritableArray segments) {
480
+ if (exception != null) {
481
+ promise.reject(exception);
482
+ return;
483
+ }
484
+ promise.resolve(segments);
485
+ tasks.remove(this);
486
+ }
487
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
488
+ tasks.put(task, "vadDetectSpeechFile-" + id);
489
+ }
490
+
491
+ public void releaseVadContext(double id, Promise promise) {
492
+ final int contextId = (int) id;
493
+ AsyncTask task = new AsyncTask<Void, Void, Void>() {
494
+ private Exception exception;
495
+
496
+ @Override
497
+ protected Void doInBackground(Void... voids) {
498
+ try {
499
+ WhisperVadContext vadContext = vadContexts.get(contextId);
500
+ if (vadContext == null) {
501
+ throw new Exception("VAD context " + id + " not found");
502
+ }
503
+ vadContext.release();
504
+ vadContexts.remove(contextId);
505
+ } catch (Exception e) {
506
+ exception = e;
507
+ }
508
+ return null;
509
+ }
510
+
511
+ @Override
512
+ protected void onPostExecute(Void result) {
513
+ if (exception != null) {
514
+ promise.reject(exception);
515
+ return;
516
+ }
517
+ promise.resolve(null);
518
+ tasks.remove(this);
519
+ }
520
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
521
+ tasks.put(task, "releaseVadContext-" + id);
522
+ }
523
+
524
+ public void releaseAllVadContexts(Promise promise) {
525
+ AsyncTask task = new AsyncTask<Void, Void, Void>() {
526
+ private Exception exception;
527
+
528
+ @Override
529
+ protected Void doInBackground(Void... voids) {
530
+ try {
531
+ for (WhisperVadContext vadContext : vadContexts.values()) {
532
+ vadContext.release();
533
+ }
534
+ vadContexts.clear();
535
+ } catch (Exception e) {
536
+ exception = e;
537
+ }
538
+ return null;
539
+ }
540
+
541
+ @Override
542
+ protected void onPostExecute(Void result) {
543
+ if (exception != null) {
544
+ promise.reject(exception);
545
+ return;
546
+ }
547
+ promise.resolve(null);
548
+ tasks.remove(this);
549
+ }
550
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
551
+ tasks.put(task, "releaseAllVadContexts");
552
+ }
553
+
347
554
  @Override
348
555
  public void onHostResume() {
349
556
  }
@@ -367,8 +574,12 @@ public class RNWhisper implements LifecycleEventListener {
367
574
  for (WhisperContext context : contexts.values()) {
368
575
  context.release();
369
576
  }
577
+ for (WhisperVadContext vadContext : vadContexts.values()) {
578
+ vadContext.release();
579
+ }
370
580
  WhisperContext.abortAllTranscribe(); // graceful abort
371
581
  contexts.clear();
582
+ vadContexts.clear();
372
583
  downloader.clearCache();
373
584
  }
374
585
  }
@@ -27,6 +27,8 @@ import java.io.PushbackInputStream;
27
27
  public class WhisperContext {
28
28
  public static final String NAME = "RNWhisperContext";
29
29
 
30
+ private static String loadedLibrary = "";
31
+
30
32
  private static final int SAMPLE_RATE = 16000;
31
33
  private static final int CHANNEL_CONFIG = AudioFormat.CHANNEL_IN_MONO;
32
34
  private static final int AUDIO_FORMAT = AudioFormat.ENCODING_PCM_16BIT;
@@ -433,67 +435,67 @@ public class WhisperContext {
433
435
 
434
436
  static {
435
437
  Log.d(NAME, "Primary ABI: " + Build.SUPPORTED_ABIS[0]);
436
- boolean loadVfpv4 = false;
437
- boolean loadV8fp16 = false;
438
- if (isArmeabiV7a()) {
439
- // armeabi-v7a needs runtime detection support
440
- String cpuInfo = cpuInfo();
441
- if (cpuInfo != null) {
442
- Log.d(NAME, "CPU info: " + cpuInfo);
443
- if (cpuInfo.contains("vfpv4")) {
444
- Log.d(NAME, "CPU supports vfpv4");
445
- loadVfpv4 = true;
446
- }
447
- }
448
- } else if (isArmeabiV8a()) {
449
- // ARMv8.2a needs runtime detection support
450
- String cpuInfo = cpuInfo();
451
- if (cpuInfo != null) {
452
- Log.d(NAME, "CPU info: " + cpuInfo);
453
- if (cpuInfo.contains("fphp")) {
454
- Log.d(NAME, "CPU supports fp16 arithmetic");
455
- loadV8fp16 = true;
456
- }
457
- }
458
- }
459
438
 
460
- if (loadVfpv4) {
461
- Log.d(NAME, "Loading libwhisper_vfpv4.so");
462
- System.loadLibrary("whisper_vfpv4");
463
- } else if (loadV8fp16) {
464
- Log.d(NAME, "Loading libwhisper_v8fp16_va.so");
465
- System.loadLibrary("whisper_v8fp16_va");
439
+ String cpuFeatures = WhisperContext.getCpuFeatures();
440
+ Log.d(NAME, "CPU features: " + cpuFeatures);
441
+ boolean hasFp16 = cpuFeatures.contains("fp16") || cpuFeatures.contains("fphp");
442
+ Log.d(NAME, "- hasFp16: " + hasFp16);
443
+
444
+ if (WhisperContext.isArm64V8a()) {
445
+ if (hasFp16) {
446
+ Log.d(NAME, "Loading librnwhisper_v8fp16_va_2.so");
447
+ System.loadLibrary("rnwhisper_v8fp16_va_2");
448
+ loadedLibrary = "rnwhisper_v8fp16_va_2";
449
+ }
450
+ } else if (WhisperContext.isArmeabiV7a()) {
451
+ Log.d(NAME, "Loading librnwhisper_vfpv4.so");
452
+ System.loadLibrary("rnwhisper_vfpv4");
453
+ loadedLibrary = "rnwhisper_vfpv4";
454
+ } else if (WhisperContext.isX86_64()) {
455
+ Log.d(NAME, "Loading librnwhisper_x86_64.so");
456
+ System.loadLibrary("rnwhisper_x86_64");
457
+ loadedLibrary = "rnwhisper_x86_64";
466
458
  } else {
467
- Log.d(NAME, "Loading libwhisper.so");
468
- System.loadLibrary("whisper");
459
+ Log.d(NAME, "ARM32 is not supported, skipping loading library");
469
460
  }
470
461
  }
471
462
 
472
- private static boolean isArmeabiV7a() {
463
+ public static boolean isArm64V8a() {
464
+ return Build.SUPPORTED_ABIS[0].equals("arm64-v8a");
465
+ }
466
+
467
+ public static boolean isArmeabiV7a() {
473
468
  return Build.SUPPORTED_ABIS[0].equals("armeabi-v7a");
474
469
  }
475
470
 
476
- private static boolean isArmeabiV8a() {
477
- return Build.SUPPORTED_ABIS[0].equals("arm64-v8a");
471
+ public static boolean isX86_64() {
472
+ return Build.SUPPORTED_ABIS[0].equals("x86_64");
478
473
  }
479
474
 
480
- private static String cpuInfo() {
475
+ public static String getCpuFeatures() {
481
476
  File file = new File("/proc/cpuinfo");
482
477
  StringBuilder stringBuilder = new StringBuilder();
483
478
  try {
484
479
  BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
485
480
  String line;
486
481
  while ((line = bufferedReader.readLine()) != null) {
482
+ if (line.startsWith("Features")) {
487
483
  stringBuilder.append(line);
484
+ break;
485
+ }
488
486
  }
489
487
  bufferedReader.close();
490
488
  return stringBuilder.toString();
491
489
  } catch (IOException e) {
492
490
  Log.w(NAME, "Couldn't read /proc/cpuinfo", e);
493
- return null;
491
+ return "";
494
492
  }
495
493
  }
496
494
 
495
+ public static String getLoadedLibrary() {
496
+ return loadedLibrary;
497
+ }
498
+
497
499
  // JNI methods
498
500
  protected static native long initContext(String modelPath);
499
501
  protected static native long initContextWithAsset(AssetManager assetManager, String modelPath);
@@ -531,4 +533,30 @@ public class WhisperContext {
531
533
  int n_samples
532
534
  );
533
535
  protected static native String bench(long context, int n_threads);
536
+
537
+ // VAD JNI methods
538
+ protected static native long initVadContext(String modelPath);
539
+ protected static native long initVadContextWithAsset(AssetManager assetManager, String modelPath);
540
+ protected static native long initVadContextWithInputStream(PushbackInputStream inputStream);
541
+ protected static native void freeVadContext(long vadContextPtr);
542
+ protected static native boolean vadDetectSpeech(long vadContextPtr, float[] audioData, int nSamples);
543
+ protected static native long vadGetSegmentsFromProbs(long vadContextPtr, float threshold,
544
+ int minSpeechDurationMs, int minSilenceDurationMs,
545
+ float maxSpeechDurationS, int speechPadMs,
546
+ float samplesOverlap);
547
+ protected static native int vadGetNSegments(long segmentsPtr);
548
+ protected static native float vadGetSegmentT0(long segmentsPtr, int index);
549
+ protected static native float vadGetSegmentT1(long segmentsPtr, int index);
550
+ protected static native void vadFreeSegments(long segmentsPtr);
551
+
552
+ // Audio file loading utility for VAD
553
+ public static float[] loadAudioFileAsFloat32(String filePath) {
554
+ try {
555
+ java.io.FileInputStream fis = new java.io.FileInputStream(new java.io.File(filePath));
556
+ return AudioUtils.decodeWaveFile(fis);
557
+ } catch (Exception e) {
558
+ Log.e(NAME, "Failed to load audio file: " + filePath, e);
559
+ return null;
560
+ }
561
+ }
534
562
  }