whisper.rn 0.4.0-rc.9 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. package/README.md +74 -1
  2. package/android/build.gradle +12 -3
  3. package/android/src/main/CMakeLists.txt +43 -13
  4. package/android/src/main/java/com/rnwhisper/RNWhisper.java +211 -0
  5. package/android/src/main/java/com/rnwhisper/WhisperContext.java +64 -36
  6. package/android/src/main/java/com/rnwhisper/WhisperVadContext.java +157 -0
  7. package/android/src/main/jni.cpp +205 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnwhisper.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnwhisper_v8fp16_va_2.so +0 -0
  10. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper.so +0 -0
  11. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper_vfpv4.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/librnwhisper.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnwhisper_x86_64.so +0 -0
  14. package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +26 -0
  15. package/android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java +26 -0
  16. package/cpp/coreml/whisper-compat.h +10 -0
  17. package/cpp/coreml/whisper-compat.m +35 -0
  18. package/cpp/coreml/whisper-decoder-impl.h +27 -15
  19. package/cpp/coreml/whisper-decoder-impl.m +36 -10
  20. package/cpp/coreml/whisper-encoder-impl.h +21 -9
  21. package/cpp/coreml/whisper-encoder-impl.m +29 -3
  22. package/cpp/ggml-alloc.c +39 -37
  23. package/cpp/ggml-alloc.h +1 -1
  24. package/cpp/ggml-backend-impl.h +55 -27
  25. package/cpp/ggml-backend-reg.cpp +591 -0
  26. package/cpp/ggml-backend.cpp +336 -955
  27. package/cpp/ggml-backend.h +70 -42
  28. package/cpp/ggml-common.h +57 -49
  29. package/cpp/ggml-cpp.h +39 -0
  30. package/cpp/ggml-cpu/amx/amx.cpp +221 -0
  31. package/cpp/ggml-cpu/amx/amx.h +8 -0
  32. package/cpp/ggml-cpu/amx/common.h +91 -0
  33. package/cpp/ggml-cpu/amx/mmq.cpp +2511 -0
  34. package/cpp/ggml-cpu/amx/mmq.h +10 -0
  35. package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  36. package/cpp/ggml-cpu/arch/arm/quants.c +4113 -0
  37. package/cpp/ggml-cpu/arch/arm/repack.cpp +2162 -0
  38. package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  39. package/cpp/ggml-cpu/arch/x86/quants.c +4310 -0
  40. package/cpp/ggml-cpu/arch/x86/repack.cpp +3284 -0
  41. package/cpp/ggml-cpu/arch-fallback.h +184 -0
  42. package/cpp/ggml-cpu/binary-ops.cpp +158 -0
  43. package/cpp/ggml-cpu/binary-ops.h +16 -0
  44. package/cpp/ggml-cpu/common.h +72 -0
  45. package/cpp/ggml-cpu/ggml-cpu-impl.h +511 -0
  46. package/cpp/ggml-cpu/ggml-cpu.c +3473 -0
  47. package/cpp/ggml-cpu/ggml-cpu.cpp +671 -0
  48. package/cpp/ggml-cpu/ops.cpp +9085 -0
  49. package/cpp/ggml-cpu/ops.h +111 -0
  50. package/cpp/ggml-cpu/quants.c +1157 -0
  51. package/cpp/ggml-cpu/quants.h +89 -0
  52. package/cpp/ggml-cpu/repack.cpp +1570 -0
  53. package/cpp/ggml-cpu/repack.h +98 -0
  54. package/cpp/ggml-cpu/simd-mappings.h +1006 -0
  55. package/cpp/ggml-cpu/traits.cpp +36 -0
  56. package/cpp/ggml-cpu/traits.h +38 -0
  57. package/cpp/ggml-cpu/unary-ops.cpp +186 -0
  58. package/cpp/ggml-cpu/unary-ops.h +28 -0
  59. package/cpp/ggml-cpu/vec.cpp +321 -0
  60. package/cpp/ggml-cpu/vec.h +973 -0
  61. package/cpp/ggml-cpu.h +143 -0
  62. package/cpp/ggml-impl.h +417 -23
  63. package/cpp/ggml-metal-impl.h +622 -0
  64. package/cpp/ggml-metal.h +9 -9
  65. package/cpp/ggml-metal.m +3451 -1344
  66. package/cpp/ggml-opt.cpp +1037 -0
  67. package/cpp/ggml-opt.h +237 -0
  68. package/cpp/ggml-quants.c +296 -10818
  69. package/cpp/ggml-quants.h +78 -125
  70. package/cpp/ggml-threading.cpp +12 -0
  71. package/cpp/ggml-threading.h +14 -0
  72. package/cpp/ggml-whisper-sim.metallib +0 -0
  73. package/cpp/ggml-whisper.metallib +0 -0
  74. package/cpp/ggml.c +4633 -21450
  75. package/cpp/ggml.h +320 -661
  76. package/cpp/gguf.cpp +1347 -0
  77. package/cpp/gguf.h +202 -0
  78. package/cpp/rn-whisper.cpp +4 -11
  79. package/cpp/whisper-arch.h +197 -0
  80. package/cpp/whisper.cpp +2022 -495
  81. package/cpp/whisper.h +75 -18
  82. package/ios/CMakeLists.txt +95 -0
  83. package/ios/RNWhisper.h +5 -0
  84. package/ios/RNWhisper.mm +147 -0
  85. package/ios/RNWhisperAudioUtils.m +4 -0
  86. package/ios/RNWhisperContext.h +5 -0
  87. package/ios/RNWhisperContext.mm +22 -26
  88. package/ios/RNWhisperVadContext.h +29 -0
  89. package/ios/RNWhisperVadContext.mm +152 -0
  90. package/ios/rnwhisper.xcframework/Info.plist +74 -0
  91. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
  92. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
  93. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +354 -0
  94. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-common.h +1861 -0
  95. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
  96. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
  97. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +603 -0
  98. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
  99. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal.h +66 -0
  100. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-opt.h +237 -0
  101. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-quants.h +100 -0
  102. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-threading.h +14 -0
  103. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +2221 -0
  104. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/gguf.h +202 -0
  105. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
  106. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
  107. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-whisper.h +52 -0
  108. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper-arch.h +197 -0
  109. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper.h +739 -0
  110. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Info.plist +0 -0
  111. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  112. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
  113. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
  114. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
  115. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +354 -0
  116. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +1861 -0
  117. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
  118. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
  119. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +603 -0
  120. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
  121. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal.h +66 -0
  122. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +237 -0
  123. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +100 -0
  124. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-threading.h +14 -0
  125. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +2221 -0
  126. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/gguf.h +202 -0
  127. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
  128. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
  129. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +52 -0
  130. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper-arch.h +197 -0
  131. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +739 -0
  132. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  133. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +101 -0
  134. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  135. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  136. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
  137. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
  138. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +354 -0
  139. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-common.h +1861 -0
  140. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
  141. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
  142. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +603 -0
  143. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
  144. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal.h +66 -0
  145. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-opt.h +237 -0
  146. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-quants.h +100 -0
  147. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-threading.h +14 -0
  148. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +2221 -0
  149. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/gguf.h +202 -0
  150. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
  151. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
  152. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-whisper.h +52 -0
  153. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper-arch.h +197 -0
  154. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper.h +739 -0
  155. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Info.plist +0 -0
  156. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  157. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
  158. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
  159. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
  160. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +354 -0
  161. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +1861 -0
  162. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
  163. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
  164. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +603 -0
  165. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
  166. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal.h +66 -0
  167. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +237 -0
  168. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +100 -0
  169. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-threading.h +14 -0
  170. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +2221 -0
  171. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/gguf.h +202 -0
  172. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
  173. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
  174. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +52 -0
  175. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper-arch.h +197 -0
  176. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +739 -0
  177. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  178. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +101 -0
  179. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  180. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  181. package/jest/mock.js +24 -0
  182. package/lib/commonjs/NativeRNWhisper.js.map +1 -1
  183. package/lib/commonjs/index.js +111 -1
  184. package/lib/commonjs/index.js.map +1 -1
  185. package/lib/commonjs/version.json +1 -1
  186. package/lib/module/NativeRNWhisper.js.map +1 -1
  187. package/lib/module/index.js +112 -0
  188. package/lib/module/index.js.map +1 -1
  189. package/lib/module/version.json +1 -1
  190. package/lib/typescript/NativeRNWhisper.d.ts +35 -0
  191. package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
  192. package/lib/typescript/index.d.ts +39 -3
  193. package/lib/typescript/index.d.ts.map +1 -1
  194. package/package.json +10 -6
  195. package/src/NativeRNWhisper.ts +48 -0
  196. package/src/index.ts +132 -1
  197. package/src/version.json +1 -1
  198. package/whisper-rn.podspec +11 -18
  199. package/cpp/README.md +0 -4
  200. package/cpp/ggml-aarch64.c +0 -3209
  201. package/cpp/ggml-aarch64.h +0 -39
  202. package/cpp/ggml-cpu-impl.h +0 -614
@@ -0,0 +1,157 @@
1
+ package com.rnwhisper;
2
+
3
+ import com.facebook.react.bridge.Arguments;
4
+ import com.facebook.react.bridge.WritableArray;
5
+ import com.facebook.react.bridge.WritableMap;
6
+ import com.facebook.react.bridge.ReadableMap;
7
+ import com.facebook.react.bridge.ReactApplicationContext;
8
+
9
+ import android.util.Log;
10
+ import android.content.res.AssetManager;
11
+ import android.util.Base64;
12
+
13
+ import java.io.PushbackInputStream;
14
+
15
+ public class WhisperVadContext {
16
+ public static final String NAME = "RNWhisperVadContext";
17
+
18
+ private int id;
19
+ private ReactApplicationContext reactContext;
20
+ private long vadContext;
21
+
22
+ public WhisperVadContext(int id, ReactApplicationContext reactContext, long vadContext) {
23
+ this.id = id;
24
+ this.vadContext = vadContext;
25
+ this.reactContext = reactContext;
26
+ }
27
+
28
+ public WritableArray detectSpeech(String audioDataBase64, ReadableMap options) throws Exception {
29
+ if (vadContext == 0) {
30
+ throw new Exception("VAD context is null");
31
+ }
32
+
33
+ // Decode base64 audio data to float array
34
+ byte[] audioBytes = Base64.decode(audioDataBase64, Base64.DEFAULT);
35
+ int numSamples = audioBytes.length / 4; // 4 bytes per float
36
+ float[] audioData = new float[numSamples];
37
+
38
+ for (int i = 0; i < numSamples; i++) {
39
+ int intBits = (audioBytes[i * 4] & 0xFF) |
40
+ ((audioBytes[i * 4 + 1] & 0xFF) << 8) |
41
+ ((audioBytes[i * 4 + 2] & 0xFF) << 16) |
42
+ ((audioBytes[i * 4 + 3] & 0xFF) << 24);
43
+ audioData[i] = Float.intBitsToFloat(intBits);
44
+ }
45
+
46
+ return processVadDetection(audioData, numSamples, options);
47
+ }
48
+
49
+ public WritableArray detectSpeechFile(String filePathOrBase64, ReadableMap options) throws Exception {
50
+ if (vadContext == 0) {
51
+ throw new Exception("VAD context is null");
52
+ }
53
+
54
+ // Follow the same pattern as transcribeFile
55
+ String filePath = filePathOrBase64;
56
+
57
+ // Handle HTTP downloads
58
+ if (filePathOrBase64.startsWith("http://") || filePathOrBase64.startsWith("https://")) {
59
+ // Note: This would require access to the downloader, but for now we'll throw an error
60
+ throw new Exception("HTTP URLs not supported in VAD file detection. Please download the file first.");
61
+ }
62
+
63
+ float[] audioData;
64
+
65
+ // Check for resource identifier (bundled assets)
66
+ int resId = getResourceIdentifier(filePath);
67
+ if (resId > 0) {
68
+ audioData = AudioUtils.decodeWaveFile(reactContext.getResources().openRawResource(resId));
69
+ } else if (filePathOrBase64.startsWith("data:audio/wav;base64,")) {
70
+ // Handle base64 WAV data
71
+ audioData = AudioUtils.decodeWaveData(filePathOrBase64);
72
+ } else {
73
+ // Handle regular file path
74
+ audioData = AudioUtils.decodeWaveFile(new java.io.FileInputStream(new java.io.File(filePath)));
75
+ }
76
+
77
+ if (audioData == null) {
78
+ throw new Exception("Failed to load audio file: " + filePathOrBase64);
79
+ }
80
+
81
+ return processVadDetection(audioData, audioData.length, options);
82
+ }
83
+
84
+ public WritableArray detectSpeechWithAudioData(float[] audioData, ReadableMap options) throws Exception {
85
+ if (vadContext == 0) {
86
+ throw new Exception("VAD context is null");
87
+ }
88
+
89
+ return processVadDetection(audioData, audioData.length, options);
90
+ }
91
+
92
+ private int getResourceIdentifier(String filePath) {
93
+ int identifier = reactContext.getResources().getIdentifier(
94
+ filePath,
95
+ "drawable",
96
+ reactContext.getPackageName()
97
+ );
98
+ if (identifier == 0) {
99
+ identifier = reactContext.getResources().getIdentifier(
100
+ filePath,
101
+ "raw",
102
+ reactContext.getPackageName()
103
+ );
104
+ }
105
+ return identifier;
106
+ }
107
+
108
+ private WritableArray processVadDetection(float[] audioData, int numSamples, ReadableMap options) throws Exception {
109
+ // Run VAD detection using WhisperContext static methods
110
+ boolean speechDetected = WhisperContext.vadDetectSpeech(vadContext, audioData, numSamples);
111
+ if (!speechDetected) {
112
+ return Arguments.createArray();
113
+ }
114
+
115
+ // Set VAD parameters from options
116
+ float threshold = options.hasKey("threshold") ? (float) options.getDouble("threshold") : 0.5f;
117
+ int minSpeechDurationMs = options.hasKey("minSpeechDurationMs") ? options.getInt("minSpeechDurationMs") : 250;
118
+ int minSilenceDurationMs = options.hasKey("minSilenceDurationMs") ? options.getInt("minSilenceDurationMs") : 100;
119
+ float maxSpeechDurationS = options.hasKey("maxSpeechDurationS") ? (float) options.getDouble("maxSpeechDurationS") : 30.0f;
120
+ int speechPadMs = options.hasKey("speechPadMs") ? options.getInt("speechPadMs") : 30;
121
+ float samplesOverlap = options.hasKey("samplesOverlap") ? (float) options.getDouble("samplesOverlap") : 0.1f;
122
+
123
+ // Get segments from VAD using WhisperContext static methods
124
+ long segments = WhisperContext.vadGetSegmentsFromProbs(vadContext, threshold, minSpeechDurationMs,
125
+ minSilenceDurationMs, maxSpeechDurationS,
126
+ speechPadMs, samplesOverlap);
127
+ if (segments == 0) {
128
+ return Arguments.createArray();
129
+ }
130
+
131
+ // Convert segments to WritableArray using WhisperContext static methods
132
+ WritableArray result = Arguments.createArray();
133
+ int nSegments = WhisperContext.vadGetNSegments(segments);
134
+
135
+ for (int i = 0; i < nSegments; i++) {
136
+ float t0 = WhisperContext.vadGetSegmentT0(segments, i);
137
+ float t1 = WhisperContext.vadGetSegmentT1(segments, i);
138
+
139
+ WritableMap segment = Arguments.createMap();
140
+ segment.putDouble("t0", t0);
141
+ segment.putDouble("t1", t1);
142
+ result.pushMap(segment);
143
+ }
144
+
145
+ // Clean up using WhisperContext static methods
146
+ WhisperContext.vadFreeSegments(segments);
147
+
148
+ return result;
149
+ }
150
+
151
+ public void release() {
152
+ if (vadContext != 0) {
153
+ WhisperContext.freeVadContext(vadContext);
154
+ vadContext = 0;
155
+ }
156
+ }
157
+ }
@@ -148,6 +148,47 @@ static struct whisper_context *whisper_init_from_asset(
148
148
  return whisper_init_with_params(&loader, cparams);
149
149
  }
150
150
 
151
+ // VAD context initialization functions
152
+ static struct whisper_vad_context *whisper_vad_init_from_input_stream(
153
+ JNIEnv *env,
154
+ jobject input_stream, // PushbackInputStream
155
+ struct whisper_vad_context_params vad_params
156
+ ) {
157
+ input_stream_context *context = new input_stream_context;
158
+ context->env = env;
159
+ context->input_stream = env->NewGlobalRef(input_stream);
160
+
161
+ whisper_model_loader loader = {
162
+ .context = context,
163
+ .read = &input_stream_read,
164
+ .eof = &input_stream_is_eof,
165
+ .close = &input_stream_close
166
+ };
167
+ return whisper_vad_init_with_params(&loader, vad_params);
168
+ }
169
+
170
+ static struct whisper_vad_context *whisper_vad_init_from_asset(
171
+ JNIEnv *env,
172
+ jobject assetManager,
173
+ const char *asset_path,
174
+ struct whisper_vad_context_params vad_params
175
+ ) {
176
+ LOGI("Loading VAD model from asset '%s'\n", asset_path);
177
+ AAssetManager *asset_manager = AAssetManager_fromJava(env, assetManager);
178
+ AAsset *asset = AAssetManager_open(asset_manager, asset_path, AASSET_MODE_STREAMING);
179
+ if (!asset) {
180
+ LOGW("Failed to open VAD asset '%s'\n", asset_path);
181
+ return NULL;
182
+ }
183
+ whisper_model_loader loader = {
184
+ .context = asset,
185
+ .read = &asset_read,
186
+ .eof = &asset_is_eof,
187
+ .close = &asset_close
188
+ };
189
+ return whisper_vad_init_with_params(&loader, vad_params);
190
+ }
191
+
151
192
  extern "C" {
152
193
 
153
194
  JNIEXPORT jlong JNICALL
@@ -155,7 +196,10 @@ Java_com_rnwhisper_WhisperContext_initContext(
155
196
  JNIEnv *env, jobject thiz, jstring model_path_str) {
156
197
  UNUSED(thiz);
157
198
  struct whisper_context_params cparams;
199
+
200
+ // TODO: Expose dtw_token_timestamps and dtw_aheads_preset
158
201
  cparams.dtw_token_timestamps = false;
202
+ // cparams.dtw_aheads_preset = WHISPER_AHEADS_BASE;
159
203
 
160
204
  struct whisper_context *context = nullptr;
161
205
  const char *model_path_chars = env->GetStringUTFChars(model_path_str, nullptr);
@@ -173,7 +217,10 @@ Java_com_rnwhisper_WhisperContext_initContextWithAsset(
173
217
  ) {
174
218
  UNUSED(thiz);
175
219
  struct whisper_context_params cparams;
220
+
221
+ // TODO: Expose dtw_token_timestamps and dtw_aheads_preset
176
222
  cparams.dtw_token_timestamps = false;
223
+ // cparams.dtw_aheads_preset = WHISPER_AHEADS_BASE;
177
224
 
178
225
  struct whisper_context *context = nullptr;
179
226
  const char *model_path_chars = env->GetStringUTFChars(model_path_str, nullptr);
@@ -190,7 +237,10 @@ Java_com_rnwhisper_WhisperContext_initContextWithInputStream(
190
237
  ) {
191
238
  UNUSED(thiz);
192
239
  struct whisper_context_params cparams;
240
+
241
+ // TODO: Expose dtw_token_timestamps and dtw_aheads_preset
193
242
  cparams.dtw_token_timestamps = false;
243
+ // cparams.dtw_aheads_preset = WHISPER_AHEADS_BASE;
194
244
 
195
245
  struct whisper_context *context = nullptr;
196
246
  context = whisper_init_from_input_stream(env, input_stream, cparams);
@@ -521,4 +571,159 @@ Java_com_rnwhisper_WhisperContext_bench(
521
571
  return env->NewStringUTF(result.c_str());
522
572
  }
523
573
 
574
+ // VAD Context JNI implementations
575
+ JNIEXPORT jlong JNICALL
576
+ Java_com_rnwhisper_WhisperContext_initVadContext(
577
+ JNIEnv *env,
578
+ jobject thiz,
579
+ jstring model_path_str
580
+ ) {
581
+ UNUSED(thiz);
582
+ struct whisper_vad_context_params vad_params = whisper_vad_default_context_params();
583
+
584
+ struct whisper_vad_context *vad_context = nullptr;
585
+ const char *model_path_chars = env->GetStringUTFChars(model_path_str, nullptr);
586
+ vad_context = whisper_vad_init_from_file_with_params(model_path_chars, vad_params);
587
+ env->ReleaseStringUTFChars(model_path_str, model_path_chars);
588
+ return reinterpret_cast<jlong>(vad_context);
589
+ }
590
+
591
+ JNIEXPORT jlong JNICALL
592
+ Java_com_rnwhisper_WhisperContext_initVadContextWithAsset(
593
+ JNIEnv *env,
594
+ jobject thiz,
595
+ jobject asset_manager,
596
+ jstring model_path_str
597
+ ) {
598
+ UNUSED(thiz);
599
+ struct whisper_vad_context_params vad_params = whisper_vad_default_context_params();
600
+
601
+ struct whisper_vad_context *vad_context = nullptr;
602
+ const char *model_path_chars = env->GetStringUTFChars(model_path_str, nullptr);
603
+ vad_context = whisper_vad_init_from_asset(env, asset_manager, model_path_chars, vad_params);
604
+ env->ReleaseStringUTFChars(model_path_str, model_path_chars);
605
+ return reinterpret_cast<jlong>(vad_context);
606
+ }
607
+
608
+ JNIEXPORT jlong JNICALL
609
+ Java_com_rnwhisper_WhisperContext_initVadContextWithInputStream(
610
+ JNIEnv *env,
611
+ jobject thiz,
612
+ jobject input_stream
613
+ ) {
614
+ UNUSED(thiz);
615
+ struct whisper_vad_context_params vad_params = whisper_vad_default_context_params();
616
+
617
+ struct whisper_vad_context *vad_context = nullptr;
618
+ vad_context = whisper_vad_init_from_input_stream(env, input_stream, vad_params);
619
+ return reinterpret_cast<jlong>(vad_context);
620
+ }
621
+
622
+ JNIEXPORT void JNICALL
623
+ Java_com_rnwhisper_WhisperContext_freeVadContext(
624
+ JNIEnv *env,
625
+ jobject thiz,
626
+ jlong vad_context_ptr
627
+ ) {
628
+ UNUSED(env);
629
+ UNUSED(thiz);
630
+ struct whisper_vad_context *vad_context = reinterpret_cast<struct whisper_vad_context *>(vad_context_ptr);
631
+ whisper_vad_free(vad_context);
632
+ }
633
+
634
+ JNIEXPORT jboolean JNICALL
635
+ Java_com_rnwhisper_WhisperContext_vadDetectSpeech(
636
+ JNIEnv *env,
637
+ jobject thiz,
638
+ jlong vad_context_ptr,
639
+ jfloatArray audio_data,
640
+ jint n_samples
641
+ ) {
642
+ UNUSED(thiz);
643
+ struct whisper_vad_context *vad_context = reinterpret_cast<struct whisper_vad_context *>(vad_context_ptr);
644
+
645
+ jfloat *audio_data_arr = env->GetFloatArrayElements(audio_data, nullptr);
646
+ bool result = whisper_vad_detect_speech(vad_context, audio_data_arr, n_samples);
647
+ env->ReleaseFloatArrayElements(audio_data, audio_data_arr, JNI_ABORT);
648
+
649
+ return result;
650
+ }
651
+
652
+ JNIEXPORT jlong JNICALL
653
+ Java_com_rnwhisper_WhisperContext_vadGetSegmentsFromProbs(
654
+ JNIEnv *env,
655
+ jobject thiz,
656
+ jlong vad_context_ptr,
657
+ jfloat threshold,
658
+ jint min_speech_duration_ms,
659
+ jint min_silence_duration_ms,
660
+ jfloat max_speech_duration_s,
661
+ jint speech_pad_ms,
662
+ jfloat samples_overlap
663
+ ) {
664
+ UNUSED(thiz);
665
+ struct whisper_vad_context *vad_context = reinterpret_cast<struct whisper_vad_context *>(vad_context_ptr);
666
+
667
+ struct whisper_vad_params vad_params = whisper_vad_default_params();
668
+ vad_params.threshold = threshold;
669
+ vad_params.min_speech_duration_ms = min_speech_duration_ms;
670
+ vad_params.min_silence_duration_ms = min_silence_duration_ms;
671
+ vad_params.max_speech_duration_s = max_speech_duration_s;
672
+ vad_params.speech_pad_ms = speech_pad_ms;
673
+ vad_params.samples_overlap = samples_overlap;
674
+
675
+ struct whisper_vad_segments *segments = whisper_vad_segments_from_probs(vad_context, vad_params);
676
+ return reinterpret_cast<jlong>(segments);
677
+ }
678
+
679
+ JNIEXPORT jint JNICALL
680
+ Java_com_rnwhisper_WhisperContext_vadGetNSegments(
681
+ JNIEnv *env,
682
+ jobject thiz,
683
+ jlong segments_ptr
684
+ ) {
685
+ UNUSED(env);
686
+ UNUSED(thiz);
687
+ struct whisper_vad_segments *segments = reinterpret_cast<struct whisper_vad_segments *>(segments_ptr);
688
+ return whisper_vad_segments_n_segments(segments);
689
+ }
690
+
691
+ JNIEXPORT jfloat JNICALL
692
+ Java_com_rnwhisper_WhisperContext_vadGetSegmentT0(
693
+ JNIEnv *env,
694
+ jobject thiz,
695
+ jlong segments_ptr,
696
+ jint index
697
+ ) {
698
+ UNUSED(env);
699
+ UNUSED(thiz);
700
+ struct whisper_vad_segments *segments = reinterpret_cast<struct whisper_vad_segments *>(segments_ptr);
701
+ return whisper_vad_segments_get_segment_t0(segments, index);
702
+ }
703
+
704
+ JNIEXPORT jfloat JNICALL
705
+ Java_com_rnwhisper_WhisperContext_vadGetSegmentT1(
706
+ JNIEnv *env,
707
+ jobject thiz,
708
+ jlong segments_ptr,
709
+ jint index
710
+ ) {
711
+ UNUSED(env);
712
+ UNUSED(thiz);
713
+ struct whisper_vad_segments *segments = reinterpret_cast<struct whisper_vad_segments *>(segments_ptr);
714
+ return whisper_vad_segments_get_segment_t1(segments, index);
715
+ }
716
+
717
+ JNIEXPORT void JNICALL
718
+ Java_com_rnwhisper_WhisperContext_vadFreeSegments(
719
+ JNIEnv *env,
720
+ jobject thiz,
721
+ jlong segments_ptr
722
+ ) {
723
+ UNUSED(env);
724
+ UNUSED(thiz);
725
+ struct whisper_vad_segments *segments = reinterpret_cast<struct whisper_vad_segments *>(segments_ptr);
726
+ whisper_vad_free_segments(segments);
727
+ }
728
+
524
729
  } // extern "C"
@@ -77,6 +77,32 @@ public class RNWhisperModule extends NativeRNWhisperSpec {
77
77
  rnwhisper.releaseAllContexts(promise);
78
78
  }
79
79
 
80
+ // VAD methods
81
+ @ReactMethod
82
+ public void initVadContext(final ReadableMap options, final Promise promise) {
83
+ rnwhisper.initVadContext(options, promise);
84
+ }
85
+
86
+ @ReactMethod
87
+ public void vadDetectSpeech(double id, String audioDataBase64, ReadableMap options, Promise promise) {
88
+ rnwhisper.vadDetectSpeech(id, audioDataBase64, options, promise);
89
+ }
90
+
91
+ @ReactMethod
92
+ public void vadDetectSpeechFile(double id, String filePath, ReadableMap options, Promise promise) {
93
+ rnwhisper.vadDetectSpeechFile(id, filePath, options, promise);
94
+ }
95
+
96
+ @ReactMethod
97
+ public void releaseVadContext(double id, Promise promise) {
98
+ rnwhisper.releaseVadContext(id, promise);
99
+ }
100
+
101
+ @ReactMethod
102
+ public void releaseAllVadContexts(Promise promise) {
103
+ rnwhisper.releaseAllVadContexts(promise);
104
+ }
105
+
80
106
  /*
81
107
  * iOS Specific methods, left here for make the turbo module happy:
82
108
  */
@@ -76,4 +76,30 @@ public class RNWhisperModule extends ReactContextBaseJavaModule {
76
76
  public void releaseAllContexts(Promise promise) {
77
77
  rnwhisper.releaseAllContexts(promise);
78
78
  }
79
+
80
+ // VAD methods
81
+ @ReactMethod
82
+ public void initVadContext(final ReadableMap options, final Promise promise) {
83
+ rnwhisper.initVadContext(options, promise);
84
+ }
85
+
86
+ @ReactMethod
87
+ public void vadDetectSpeech(double id, String audioDataBase64, ReadableMap options, Promise promise) {
88
+ rnwhisper.vadDetectSpeech(id, audioDataBase64, options, promise);
89
+ }
90
+
91
+ @ReactMethod
92
+ public void vadDetectSpeechFile(double id, String filePath, ReadableMap options, Promise promise) {
93
+ rnwhisper.vadDetectSpeechFile(id, filePath, options, promise);
94
+ }
95
+
96
+ @ReactMethod
97
+ public void releaseVadContext(double id, Promise promise) {
98
+ rnwhisper.releaseVadContext(id, promise);
99
+ }
100
+
101
+ @ReactMethod
102
+ public void releaseAllVadContexts(Promise promise) {
103
+ rnwhisper.releaseAllVadContexts(promise);
104
+ }
79
105
  }
@@ -0,0 +1,10 @@
1
+ #import <CoreML/CoreML.h>
2
+
3
+ @interface MLModel (Compat)
4
+ - (void) predictionFromFeatures:(id<MLFeatureProvider>) input
5
+ completionHandler:(void (^)(id<MLFeatureProvider> output, NSError * error)) completionHandler;
6
+
7
+ - (void) predictionFromFeatures:(id<MLFeatureProvider>) input
8
+ options:(MLPredictionOptions *) options
9
+ completionHandler:(void (^)(id<MLFeatureProvider> output, NSError * error)) completionHandler;
10
+ @end
@@ -0,0 +1,35 @@
1
+ #import "whisper-compat.h"
2
+ #import <Foundation/Foundation.h>
3
+
4
+ @implementation MLModel (Compat)
5
+
6
+ #if !defined(MAC_OS_X_VERSION_14_00) || MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_14_00
7
+
8
+ - (void) predictionFromFeatures:(id<MLFeatureProvider>) input
9
+ completionHandler:(void (^)(id<MLFeatureProvider> output, NSError * error)) completionHandler {
10
+ [NSOperationQueue.new addOperationWithBlock:^{
11
+ NSError *error = nil;
12
+ id<MLFeatureProvider> prediction = [self predictionFromFeatures:input error:&error];
13
+
14
+ [NSOperationQueue.mainQueue addOperationWithBlock:^{
15
+ completionHandler(prediction, error);
16
+ }];
17
+ }];
18
+ }
19
+
20
+ - (void) predictionFromFeatures:(id<MLFeatureProvider>) input
21
+ options:(MLPredictionOptions *) options
22
+ completionHandler:(void (^)(id<MLFeatureProvider> output, NSError * error)) completionHandler {
23
+ [NSOperationQueue.new addOperationWithBlock:^{
24
+ NSError *error = nil;
25
+ id<MLFeatureProvider> prediction = [self predictionFromFeatures:input options:options error:&error];
26
+
27
+ [NSOperationQueue.mainQueue addOperationWithBlock:^{
28
+ completionHandler(prediction, error);
29
+ }];
30
+ }];
31
+ }
32
+
33
+ #endif
34
+
35
+ @end
@@ -11,36 +11,33 @@
11
11
 
12
12
  NS_ASSUME_NONNULL_BEGIN
13
13
 
14
-
15
14
  /// Model Prediction Input Type
16
- API_AVAILABLE(macos(12.0), ios(15.0), watchos(8.0), tvos(15.0)) __attribute__((visibility("hidden")))
15
+ API_AVAILABLE(macos(10.15), ios(13.0), watchos(6.0), tvos(13.0)) __attribute__((visibility("hidden")))
17
16
  @interface whisper_decoder_implInput : NSObject<MLFeatureProvider>
18
17
 
19
- /// token_data as 1 by 1 matrix of 32-bit integers
18
+ /// token_data as 1 by 1 matrix of floats
20
19
  @property (readwrite, nonatomic, strong) MLMultiArray * token_data;
21
20
 
22
- /// audio_data as 1 × 384 × 1 × 1500 4-dimensional array of floats
21
+ /// audio_data as 1 × 1500 × 384 3-dimensional array of floats
23
22
  @property (readwrite, nonatomic, strong) MLMultiArray * audio_data;
24
23
  - (instancetype)init NS_UNAVAILABLE;
25
24
  - (instancetype)initWithToken_data:(MLMultiArray *)token_data audio_data:(MLMultiArray *)audio_data NS_DESIGNATED_INITIALIZER;
26
25
 
27
26
  @end
28
27
 
29
-
30
28
  /// Model Prediction Output Type
31
- API_AVAILABLE(macos(12.0), ios(15.0), watchos(8.0), tvos(15.0)) __attribute__((visibility("hidden")))
29
+ API_AVAILABLE(macos(10.15), ios(13.0), watchos(6.0), tvos(13.0)) __attribute__((visibility("hidden")))
32
30
  @interface whisper_decoder_implOutput : NSObject<MLFeatureProvider>
33
31
 
34
- /// var_1346 as multidimensional array of floats
35
- @property (readwrite, nonatomic, strong) MLMultiArray * var_1346;
32
+ /// cast_76 as multidimensional array of floats
33
+ @property (readwrite, nonatomic, strong) MLMultiArray * cast_76;
36
34
  - (instancetype)init NS_UNAVAILABLE;
37
- - (instancetype)initWithVar_1346:(MLMultiArray *)var_1346 NS_DESIGNATED_INITIALIZER;
35
+ - (instancetype)initWithCast_76:(MLMultiArray *)cast_76 NS_DESIGNATED_INITIALIZER;
38
36
 
39
37
  @end
40
38
 
41
-
42
39
  /// Class for model loading and prediction
43
- API_AVAILABLE(macos(12.0), ios(15.0), watchos(8.0), tvos(15.0)) __attribute__((visibility("hidden")))
40
+ API_AVAILABLE(macos(10.15), ios(13.0), watchos(6.0), tvos(13.0)) __attribute__((visibility("hidden")))
44
41
  @interface whisper_decoder_impl : NSObject
45
42
  @property (readonly, nonatomic, nullable) MLModel * model;
46
43
 
@@ -94,7 +91,7 @@ API_AVAILABLE(macos(12.0), ios(15.0), watchos(8.0), tvos(15.0)) __attribute__((v
94
91
  @param configuration The model configuration
95
92
  @param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid whisper_decoder_impl instance or NSError object.
96
93
  */
97
- + (void)loadWithConfiguration:(MLModelConfiguration *)configuration completionHandler:(void (^)(whisper_decoder_impl * _Nullable model, NSError * _Nullable error))handler;
94
+ + (void)loadWithConfiguration:(MLModelConfiguration *)configuration completionHandler:(void (^)(whisper_decoder_impl * _Nullable model, NSError * _Nullable error))handler API_AVAILABLE(macos(11.0), ios(14.0), watchos(7.0), tvos(14.0)) __attribute__((visibility("hidden")));
98
95
 
99
96
  /**
100
97
  Construct whisper_decoder_impl instance asynchronously with URL of .mlmodelc directory and optional configuration.
@@ -105,7 +102,7 @@ API_AVAILABLE(macos(12.0), ios(15.0), watchos(8.0), tvos(15.0)) __attribute__((v
105
102
  @param configuration The model configuration
106
103
  @param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid whisper_decoder_impl instance or NSError object.
107
104
  */
108
- + (void)loadContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration completionHandler:(void (^)(whisper_decoder_impl * _Nullable model, NSError * _Nullable error))handler;
105
+ + (void)loadContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration completionHandler:(void (^)(whisper_decoder_impl * _Nullable model, NSError * _Nullable error))handler API_AVAILABLE(macos(11.0), ios(14.0), watchos(7.0), tvos(14.0)) __attribute__((visibility("hidden")));
109
106
 
110
107
  /**
111
108
  Make a prediction using the standard interface
@@ -124,10 +121,25 @@ API_AVAILABLE(macos(12.0), ios(15.0), watchos(8.0), tvos(15.0)) __attribute__((v
124
121
  */
125
122
  - (nullable whisper_decoder_implOutput *)predictionFromFeatures:(whisper_decoder_implInput *)input options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error;
126
123
 
124
+ /**
125
+ Make an asynchronous prediction using the standard interface
126
+ @param input an instance of whisper_decoder_implInput to predict from
127
+ @param completionHandler a block that will be called upon completion of the prediction. error will be nil if no error occurred.
128
+ */
129
+ - (void)predictionFromFeatures:(whisper_decoder_implInput *)input completionHandler:(void (^)(whisper_decoder_implOutput * _Nullable output, NSError * _Nullable error))completionHandler API_AVAILABLE(macos(14.0), ios(17.0), watchos(10.0), tvos(17.0)) __attribute__((visibility("hidden")));
130
+
131
+ /**
132
+ Make an asynchronous prediction using the standard interface
133
+ @param input an instance of whisper_decoder_implInput to predict from
134
+ @param options prediction options
135
+ @param completionHandler a block that will be called upon completion of the prediction. error will be nil if no error occurred.
136
+ */
137
+ - (void)predictionFromFeatures:(whisper_decoder_implInput *)input options:(MLPredictionOptions *)options completionHandler:(void (^)(whisper_decoder_implOutput * _Nullable output, NSError * _Nullable error))completionHandler API_AVAILABLE(macos(14.0), ios(17.0), watchos(10.0), tvos(17.0)) __attribute__((visibility("hidden")));
138
+
127
139
  /**
128
140
  Make a prediction using the convenience interface
129
- @param token_data as 1 by 1 matrix of 32-bit integers:
130
- @param audio_data as 1 × 384 × 1 × 1500 4-dimensional array of floats:
141
+ @param token_data 1 by 1 matrix of floats
142
+ @param audio_data 1 × 1500 × 384 3-dimensional array of floats
131
143
  @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
132
144
  @return the prediction as whisper_decoder_implOutput
133
145
  */