whisper.rn 0.4.0-rc.9 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -1
- package/android/build.gradle +12 -3
- package/android/src/main/CMakeLists.txt +43 -13
- package/android/src/main/java/com/rnwhisper/RNWhisper.java +211 -0
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +64 -36
- package/android/src/main/java/com/rnwhisper/WhisperVadContext.java +157 -0
- package/android/src/main/jni.cpp +205 -0
- package/android/src/main/jniLibs/arm64-v8a/librnwhisper.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnwhisper_v8fp16_va_2.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/librnwhisper.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/librnwhisper_vfpv4.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnwhisper.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnwhisper_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +26 -0
- package/android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java +26 -0
- package/cpp/coreml/whisper-compat.h +10 -0
- package/cpp/coreml/whisper-compat.m +35 -0
- package/cpp/coreml/whisper-decoder-impl.h +27 -15
- package/cpp/coreml/whisper-decoder-impl.m +36 -10
- package/cpp/coreml/whisper-encoder-impl.h +21 -9
- package/cpp/coreml/whisper-encoder-impl.m +29 -3
- package/cpp/ggml-alloc.c +39 -37
- package/cpp/ggml-alloc.h +1 -1
- package/cpp/ggml-backend-impl.h +55 -27
- package/cpp/ggml-backend-reg.cpp +591 -0
- package/cpp/ggml-backend.cpp +336 -955
- package/cpp/ggml-backend.h +70 -42
- package/cpp/ggml-common.h +57 -49
- package/cpp/ggml-cpp.h +39 -0
- package/cpp/ggml-cpu/amx/amx.cpp +221 -0
- package/cpp/ggml-cpu/amx/amx.h +8 -0
- package/cpp/ggml-cpu/amx/common.h +91 -0
- package/cpp/ggml-cpu/amx/mmq.cpp +2511 -0
- package/cpp/ggml-cpu/amx/mmq.h +10 -0
- package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/cpp/ggml-cpu/arch/arm/quants.c +4113 -0
- package/cpp/ggml-cpu/arch/arm/repack.cpp +2162 -0
- package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
- package/cpp/ggml-cpu/arch/x86/quants.c +4310 -0
- package/cpp/ggml-cpu/arch/x86/repack.cpp +3284 -0
- package/cpp/ggml-cpu/arch-fallback.h +184 -0
- package/cpp/ggml-cpu/binary-ops.cpp +158 -0
- package/cpp/ggml-cpu/binary-ops.h +16 -0
- package/cpp/ggml-cpu/common.h +72 -0
- package/cpp/ggml-cpu/ggml-cpu-impl.h +511 -0
- package/cpp/ggml-cpu/ggml-cpu.c +3473 -0
- package/cpp/ggml-cpu/ggml-cpu.cpp +671 -0
- package/cpp/ggml-cpu/ops.cpp +9085 -0
- package/cpp/ggml-cpu/ops.h +111 -0
- package/cpp/ggml-cpu/quants.c +1157 -0
- package/cpp/ggml-cpu/quants.h +89 -0
- package/cpp/ggml-cpu/repack.cpp +1570 -0
- package/cpp/ggml-cpu/repack.h +98 -0
- package/cpp/ggml-cpu/simd-mappings.h +1006 -0
- package/cpp/ggml-cpu/traits.cpp +36 -0
- package/cpp/ggml-cpu/traits.h +38 -0
- package/cpp/ggml-cpu/unary-ops.cpp +186 -0
- package/cpp/ggml-cpu/unary-ops.h +28 -0
- package/cpp/ggml-cpu/vec.cpp +321 -0
- package/cpp/ggml-cpu/vec.h +973 -0
- package/cpp/ggml-cpu.h +143 -0
- package/cpp/ggml-impl.h +417 -23
- package/cpp/ggml-metal-impl.h +622 -0
- package/cpp/ggml-metal.h +9 -9
- package/cpp/ggml-metal.m +3451 -1344
- package/cpp/ggml-opt.cpp +1037 -0
- package/cpp/ggml-opt.h +237 -0
- package/cpp/ggml-quants.c +296 -10818
- package/cpp/ggml-quants.h +78 -125
- package/cpp/ggml-threading.cpp +12 -0
- package/cpp/ggml-threading.h +14 -0
- package/cpp/ggml-whisper-sim.metallib +0 -0
- package/cpp/ggml-whisper.metallib +0 -0
- package/cpp/ggml.c +4633 -21450
- package/cpp/ggml.h +320 -661
- package/cpp/gguf.cpp +1347 -0
- package/cpp/gguf.h +202 -0
- package/cpp/rn-whisper.cpp +4 -11
- package/cpp/whisper-arch.h +197 -0
- package/cpp/whisper.cpp +2022 -495
- package/cpp/whisper.h +75 -18
- package/ios/CMakeLists.txt +95 -0
- package/ios/RNWhisper.h +5 -0
- package/ios/RNWhisper.mm +147 -0
- package/ios/RNWhisperAudioUtils.m +4 -0
- package/ios/RNWhisperContext.h +5 -0
- package/ios/RNWhisperContext.mm +22 -26
- package/ios/RNWhisperVadContext.h +29 -0
- package/ios/RNWhisperVadContext.mm +152 -0
- package/ios/rnwhisper.xcframework/Info.plist +74 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-common.h +1861 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +603 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-opt.h +237 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +2221 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/gguf.h +202 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-whisper.h +52 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper-arch.h +197 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper.h +739 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +1861 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +603 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +237 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +2221 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/gguf.h +202 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +52 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper-arch.h +197 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +739 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +101 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
- package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-common.h +1861 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +603 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-opt.h +237 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +2221 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/gguf.h +202 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-whisper.h +52 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper-arch.h +197 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper.h +739 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +1861 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +143 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +603 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +622 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +237 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +2221 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/gguf.h +202 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-audioutils.h +14 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper-log.h +11 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +52 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper-arch.h +197 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +739 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +101 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
- package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
- package/jest/mock.js +24 -0
- package/lib/commonjs/NativeRNWhisper.js.map +1 -1
- package/lib/commonjs/index.js +111 -1
- package/lib/commonjs/index.js.map +1 -1
- package/lib/commonjs/version.json +1 -1
- package/lib/module/NativeRNWhisper.js.map +1 -1
- package/lib/module/index.js +112 -0
- package/lib/module/index.js.map +1 -1
- package/lib/module/version.json +1 -1
- package/lib/typescript/NativeRNWhisper.d.ts +35 -0
- package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +39 -3
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +10 -6
- package/src/NativeRNWhisper.ts +48 -0
- package/src/index.ts +132 -1
- package/src/version.json +1 -1
- package/whisper-rn.podspec +11 -18
- package/cpp/README.md +0 -4
- package/cpp/ggml-aarch64.c +0 -3209
- package/cpp/ggml-aarch64.h +0 -39
- package/cpp/ggml-cpu-impl.h +0 -614
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
package com.rnwhisper;
|
|
2
|
+
|
|
3
|
+
import com.facebook.react.bridge.Arguments;
|
|
4
|
+
import com.facebook.react.bridge.WritableArray;
|
|
5
|
+
import com.facebook.react.bridge.WritableMap;
|
|
6
|
+
import com.facebook.react.bridge.ReadableMap;
|
|
7
|
+
import com.facebook.react.bridge.ReactApplicationContext;
|
|
8
|
+
|
|
9
|
+
import android.util.Log;
|
|
10
|
+
import android.content.res.AssetManager;
|
|
11
|
+
import android.util.Base64;
|
|
12
|
+
|
|
13
|
+
import java.io.PushbackInputStream;
|
|
14
|
+
|
|
15
|
+
public class WhisperVadContext {
|
|
16
|
+
public static final String NAME = "RNWhisperVadContext";
|
|
17
|
+
|
|
18
|
+
private int id;
|
|
19
|
+
private ReactApplicationContext reactContext;
|
|
20
|
+
private long vadContext;
|
|
21
|
+
|
|
22
|
+
public WhisperVadContext(int id, ReactApplicationContext reactContext, long vadContext) {
|
|
23
|
+
this.id = id;
|
|
24
|
+
this.vadContext = vadContext;
|
|
25
|
+
this.reactContext = reactContext;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
public WritableArray detectSpeech(String audioDataBase64, ReadableMap options) throws Exception {
|
|
29
|
+
if (vadContext == 0) {
|
|
30
|
+
throw new Exception("VAD context is null");
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// Decode base64 audio data to float array
|
|
34
|
+
byte[] audioBytes = Base64.decode(audioDataBase64, Base64.DEFAULT);
|
|
35
|
+
int numSamples = audioBytes.length / 4; // 4 bytes per float
|
|
36
|
+
float[] audioData = new float[numSamples];
|
|
37
|
+
|
|
38
|
+
for (int i = 0; i < numSamples; i++) {
|
|
39
|
+
int intBits = (audioBytes[i * 4] & 0xFF) |
|
|
40
|
+
((audioBytes[i * 4 + 1] & 0xFF) << 8) |
|
|
41
|
+
((audioBytes[i * 4 + 2] & 0xFF) << 16) |
|
|
42
|
+
((audioBytes[i * 4 + 3] & 0xFF) << 24);
|
|
43
|
+
audioData[i] = Float.intBitsToFloat(intBits);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
return processVadDetection(audioData, numSamples, options);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
public WritableArray detectSpeechFile(String filePathOrBase64, ReadableMap options) throws Exception {
|
|
50
|
+
if (vadContext == 0) {
|
|
51
|
+
throw new Exception("VAD context is null");
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Follow the same pattern as transcribeFile
|
|
55
|
+
String filePath = filePathOrBase64;
|
|
56
|
+
|
|
57
|
+
// Handle HTTP downloads
|
|
58
|
+
if (filePathOrBase64.startsWith("http://") || filePathOrBase64.startsWith("https://")) {
|
|
59
|
+
// Note: This would require access to the downloader, but for now we'll throw an error
|
|
60
|
+
throw new Exception("HTTP URLs not supported in VAD file detection. Please download the file first.");
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
float[] audioData;
|
|
64
|
+
|
|
65
|
+
// Check for resource identifier (bundled assets)
|
|
66
|
+
int resId = getResourceIdentifier(filePath);
|
|
67
|
+
if (resId > 0) {
|
|
68
|
+
audioData = AudioUtils.decodeWaveFile(reactContext.getResources().openRawResource(resId));
|
|
69
|
+
} else if (filePathOrBase64.startsWith("data:audio/wav;base64,")) {
|
|
70
|
+
// Handle base64 WAV data
|
|
71
|
+
audioData = AudioUtils.decodeWaveData(filePathOrBase64);
|
|
72
|
+
} else {
|
|
73
|
+
// Handle regular file path
|
|
74
|
+
audioData = AudioUtils.decodeWaveFile(new java.io.FileInputStream(new java.io.File(filePath)));
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
if (audioData == null) {
|
|
78
|
+
throw new Exception("Failed to load audio file: " + filePathOrBase64);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
return processVadDetection(audioData, audioData.length, options);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
public WritableArray detectSpeechWithAudioData(float[] audioData, ReadableMap options) throws Exception {
|
|
85
|
+
if (vadContext == 0) {
|
|
86
|
+
throw new Exception("VAD context is null");
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
return processVadDetection(audioData, audioData.length, options);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
private int getResourceIdentifier(String filePath) {
|
|
93
|
+
int identifier = reactContext.getResources().getIdentifier(
|
|
94
|
+
filePath,
|
|
95
|
+
"drawable",
|
|
96
|
+
reactContext.getPackageName()
|
|
97
|
+
);
|
|
98
|
+
if (identifier == 0) {
|
|
99
|
+
identifier = reactContext.getResources().getIdentifier(
|
|
100
|
+
filePath,
|
|
101
|
+
"raw",
|
|
102
|
+
reactContext.getPackageName()
|
|
103
|
+
);
|
|
104
|
+
}
|
|
105
|
+
return identifier;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
private WritableArray processVadDetection(float[] audioData, int numSamples, ReadableMap options) throws Exception {
|
|
109
|
+
// Run VAD detection using WhisperContext static methods
|
|
110
|
+
boolean speechDetected = WhisperContext.vadDetectSpeech(vadContext, audioData, numSamples);
|
|
111
|
+
if (!speechDetected) {
|
|
112
|
+
return Arguments.createArray();
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Set VAD parameters from options
|
|
116
|
+
float threshold = options.hasKey("threshold") ? (float) options.getDouble("threshold") : 0.5f;
|
|
117
|
+
int minSpeechDurationMs = options.hasKey("minSpeechDurationMs") ? options.getInt("minSpeechDurationMs") : 250;
|
|
118
|
+
int minSilenceDurationMs = options.hasKey("minSilenceDurationMs") ? options.getInt("minSilenceDurationMs") : 100;
|
|
119
|
+
float maxSpeechDurationS = options.hasKey("maxSpeechDurationS") ? (float) options.getDouble("maxSpeechDurationS") : 30.0f;
|
|
120
|
+
int speechPadMs = options.hasKey("speechPadMs") ? options.getInt("speechPadMs") : 30;
|
|
121
|
+
float samplesOverlap = options.hasKey("samplesOverlap") ? (float) options.getDouble("samplesOverlap") : 0.1f;
|
|
122
|
+
|
|
123
|
+
// Get segments from VAD using WhisperContext static methods
|
|
124
|
+
long segments = WhisperContext.vadGetSegmentsFromProbs(vadContext, threshold, minSpeechDurationMs,
|
|
125
|
+
minSilenceDurationMs, maxSpeechDurationS,
|
|
126
|
+
speechPadMs, samplesOverlap);
|
|
127
|
+
if (segments == 0) {
|
|
128
|
+
return Arguments.createArray();
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Convert segments to WritableArray using WhisperContext static methods
|
|
132
|
+
WritableArray result = Arguments.createArray();
|
|
133
|
+
int nSegments = WhisperContext.vadGetNSegments(segments);
|
|
134
|
+
|
|
135
|
+
for (int i = 0; i < nSegments; i++) {
|
|
136
|
+
float t0 = WhisperContext.vadGetSegmentT0(segments, i);
|
|
137
|
+
float t1 = WhisperContext.vadGetSegmentT1(segments, i);
|
|
138
|
+
|
|
139
|
+
WritableMap segment = Arguments.createMap();
|
|
140
|
+
segment.putDouble("t0", t0);
|
|
141
|
+
segment.putDouble("t1", t1);
|
|
142
|
+
result.pushMap(segment);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// Clean up using WhisperContext static methods
|
|
146
|
+
WhisperContext.vadFreeSegments(segments);
|
|
147
|
+
|
|
148
|
+
return result;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
public void release() {
|
|
152
|
+
if (vadContext != 0) {
|
|
153
|
+
WhisperContext.freeVadContext(vadContext);
|
|
154
|
+
vadContext = 0;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
}
|
package/android/src/main/jni.cpp
CHANGED
|
@@ -148,6 +148,47 @@ static struct whisper_context *whisper_init_from_asset(
|
|
|
148
148
|
return whisper_init_with_params(&loader, cparams);
|
|
149
149
|
}
|
|
150
150
|
|
|
151
|
+
// VAD context initialization functions
|
|
152
|
+
static struct whisper_vad_context *whisper_vad_init_from_input_stream(
|
|
153
|
+
JNIEnv *env,
|
|
154
|
+
jobject input_stream, // PushbackInputStream
|
|
155
|
+
struct whisper_vad_context_params vad_params
|
|
156
|
+
) {
|
|
157
|
+
input_stream_context *context = new input_stream_context;
|
|
158
|
+
context->env = env;
|
|
159
|
+
context->input_stream = env->NewGlobalRef(input_stream);
|
|
160
|
+
|
|
161
|
+
whisper_model_loader loader = {
|
|
162
|
+
.context = context,
|
|
163
|
+
.read = &input_stream_read,
|
|
164
|
+
.eof = &input_stream_is_eof,
|
|
165
|
+
.close = &input_stream_close
|
|
166
|
+
};
|
|
167
|
+
return whisper_vad_init_with_params(&loader, vad_params);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
static struct whisper_vad_context *whisper_vad_init_from_asset(
|
|
171
|
+
JNIEnv *env,
|
|
172
|
+
jobject assetManager,
|
|
173
|
+
const char *asset_path,
|
|
174
|
+
struct whisper_vad_context_params vad_params
|
|
175
|
+
) {
|
|
176
|
+
LOGI("Loading VAD model from asset '%s'\n", asset_path);
|
|
177
|
+
AAssetManager *asset_manager = AAssetManager_fromJava(env, assetManager);
|
|
178
|
+
AAsset *asset = AAssetManager_open(asset_manager, asset_path, AASSET_MODE_STREAMING);
|
|
179
|
+
if (!asset) {
|
|
180
|
+
LOGW("Failed to open VAD asset '%s'\n", asset_path);
|
|
181
|
+
return NULL;
|
|
182
|
+
}
|
|
183
|
+
whisper_model_loader loader = {
|
|
184
|
+
.context = asset,
|
|
185
|
+
.read = &asset_read,
|
|
186
|
+
.eof = &asset_is_eof,
|
|
187
|
+
.close = &asset_close
|
|
188
|
+
};
|
|
189
|
+
return whisper_vad_init_with_params(&loader, vad_params);
|
|
190
|
+
}
|
|
191
|
+
|
|
151
192
|
extern "C" {
|
|
152
193
|
|
|
153
194
|
JNIEXPORT jlong JNICALL
|
|
@@ -155,7 +196,10 @@ Java_com_rnwhisper_WhisperContext_initContext(
|
|
|
155
196
|
JNIEnv *env, jobject thiz, jstring model_path_str) {
|
|
156
197
|
UNUSED(thiz);
|
|
157
198
|
struct whisper_context_params cparams;
|
|
199
|
+
|
|
200
|
+
// TODO: Expose dtw_token_timestamps and dtw_aheads_preset
|
|
158
201
|
cparams.dtw_token_timestamps = false;
|
|
202
|
+
// cparams.dtw_aheads_preset = WHISPER_AHEADS_BASE;
|
|
159
203
|
|
|
160
204
|
struct whisper_context *context = nullptr;
|
|
161
205
|
const char *model_path_chars = env->GetStringUTFChars(model_path_str, nullptr);
|
|
@@ -173,7 +217,10 @@ Java_com_rnwhisper_WhisperContext_initContextWithAsset(
|
|
|
173
217
|
) {
|
|
174
218
|
UNUSED(thiz);
|
|
175
219
|
struct whisper_context_params cparams;
|
|
220
|
+
|
|
221
|
+
// TODO: Expose dtw_token_timestamps and dtw_aheads_preset
|
|
176
222
|
cparams.dtw_token_timestamps = false;
|
|
223
|
+
// cparams.dtw_aheads_preset = WHISPER_AHEADS_BASE;
|
|
177
224
|
|
|
178
225
|
struct whisper_context *context = nullptr;
|
|
179
226
|
const char *model_path_chars = env->GetStringUTFChars(model_path_str, nullptr);
|
|
@@ -190,7 +237,10 @@ Java_com_rnwhisper_WhisperContext_initContextWithInputStream(
|
|
|
190
237
|
) {
|
|
191
238
|
UNUSED(thiz);
|
|
192
239
|
struct whisper_context_params cparams;
|
|
240
|
+
|
|
241
|
+
// TODO: Expose dtw_token_timestamps and dtw_aheads_preset
|
|
193
242
|
cparams.dtw_token_timestamps = false;
|
|
243
|
+
// cparams.dtw_aheads_preset = WHISPER_AHEADS_BASE;
|
|
194
244
|
|
|
195
245
|
struct whisper_context *context = nullptr;
|
|
196
246
|
context = whisper_init_from_input_stream(env, input_stream, cparams);
|
|
@@ -521,4 +571,159 @@ Java_com_rnwhisper_WhisperContext_bench(
|
|
|
521
571
|
return env->NewStringUTF(result.c_str());
|
|
522
572
|
}
|
|
523
573
|
|
|
574
|
+
// VAD Context JNI implementations
|
|
575
|
+
JNIEXPORT jlong JNICALL
|
|
576
|
+
Java_com_rnwhisper_WhisperContext_initVadContext(
|
|
577
|
+
JNIEnv *env,
|
|
578
|
+
jobject thiz,
|
|
579
|
+
jstring model_path_str
|
|
580
|
+
) {
|
|
581
|
+
UNUSED(thiz);
|
|
582
|
+
struct whisper_vad_context_params vad_params = whisper_vad_default_context_params();
|
|
583
|
+
|
|
584
|
+
struct whisper_vad_context *vad_context = nullptr;
|
|
585
|
+
const char *model_path_chars = env->GetStringUTFChars(model_path_str, nullptr);
|
|
586
|
+
vad_context = whisper_vad_init_from_file_with_params(model_path_chars, vad_params);
|
|
587
|
+
env->ReleaseStringUTFChars(model_path_str, model_path_chars);
|
|
588
|
+
return reinterpret_cast<jlong>(vad_context);
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
JNIEXPORT jlong JNICALL
|
|
592
|
+
Java_com_rnwhisper_WhisperContext_initVadContextWithAsset(
|
|
593
|
+
JNIEnv *env,
|
|
594
|
+
jobject thiz,
|
|
595
|
+
jobject asset_manager,
|
|
596
|
+
jstring model_path_str
|
|
597
|
+
) {
|
|
598
|
+
UNUSED(thiz);
|
|
599
|
+
struct whisper_vad_context_params vad_params = whisper_vad_default_context_params();
|
|
600
|
+
|
|
601
|
+
struct whisper_vad_context *vad_context = nullptr;
|
|
602
|
+
const char *model_path_chars = env->GetStringUTFChars(model_path_str, nullptr);
|
|
603
|
+
vad_context = whisper_vad_init_from_asset(env, asset_manager, model_path_chars, vad_params);
|
|
604
|
+
env->ReleaseStringUTFChars(model_path_str, model_path_chars);
|
|
605
|
+
return reinterpret_cast<jlong>(vad_context);
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
JNIEXPORT jlong JNICALL
|
|
609
|
+
Java_com_rnwhisper_WhisperContext_initVadContextWithInputStream(
|
|
610
|
+
JNIEnv *env,
|
|
611
|
+
jobject thiz,
|
|
612
|
+
jobject input_stream
|
|
613
|
+
) {
|
|
614
|
+
UNUSED(thiz);
|
|
615
|
+
struct whisper_vad_context_params vad_params = whisper_vad_default_context_params();
|
|
616
|
+
|
|
617
|
+
struct whisper_vad_context *vad_context = nullptr;
|
|
618
|
+
vad_context = whisper_vad_init_from_input_stream(env, input_stream, vad_params);
|
|
619
|
+
return reinterpret_cast<jlong>(vad_context);
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
JNIEXPORT void JNICALL
|
|
623
|
+
Java_com_rnwhisper_WhisperContext_freeVadContext(
|
|
624
|
+
JNIEnv *env,
|
|
625
|
+
jobject thiz,
|
|
626
|
+
jlong vad_context_ptr
|
|
627
|
+
) {
|
|
628
|
+
UNUSED(env);
|
|
629
|
+
UNUSED(thiz);
|
|
630
|
+
struct whisper_vad_context *vad_context = reinterpret_cast<struct whisper_vad_context *>(vad_context_ptr);
|
|
631
|
+
whisper_vad_free(vad_context);
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
JNIEXPORT jboolean JNICALL
|
|
635
|
+
Java_com_rnwhisper_WhisperContext_vadDetectSpeech(
|
|
636
|
+
JNIEnv *env,
|
|
637
|
+
jobject thiz,
|
|
638
|
+
jlong vad_context_ptr,
|
|
639
|
+
jfloatArray audio_data,
|
|
640
|
+
jint n_samples
|
|
641
|
+
) {
|
|
642
|
+
UNUSED(thiz);
|
|
643
|
+
struct whisper_vad_context *vad_context = reinterpret_cast<struct whisper_vad_context *>(vad_context_ptr);
|
|
644
|
+
|
|
645
|
+
jfloat *audio_data_arr = env->GetFloatArrayElements(audio_data, nullptr);
|
|
646
|
+
bool result = whisper_vad_detect_speech(vad_context, audio_data_arr, n_samples);
|
|
647
|
+
env->ReleaseFloatArrayElements(audio_data, audio_data_arr, JNI_ABORT);
|
|
648
|
+
|
|
649
|
+
return result;
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
JNIEXPORT jlong JNICALL
|
|
653
|
+
Java_com_rnwhisper_WhisperContext_vadGetSegmentsFromProbs(
|
|
654
|
+
JNIEnv *env,
|
|
655
|
+
jobject thiz,
|
|
656
|
+
jlong vad_context_ptr,
|
|
657
|
+
jfloat threshold,
|
|
658
|
+
jint min_speech_duration_ms,
|
|
659
|
+
jint min_silence_duration_ms,
|
|
660
|
+
jfloat max_speech_duration_s,
|
|
661
|
+
jint speech_pad_ms,
|
|
662
|
+
jfloat samples_overlap
|
|
663
|
+
) {
|
|
664
|
+
UNUSED(thiz);
|
|
665
|
+
struct whisper_vad_context *vad_context = reinterpret_cast<struct whisper_vad_context *>(vad_context_ptr);
|
|
666
|
+
|
|
667
|
+
struct whisper_vad_params vad_params = whisper_vad_default_params();
|
|
668
|
+
vad_params.threshold = threshold;
|
|
669
|
+
vad_params.min_speech_duration_ms = min_speech_duration_ms;
|
|
670
|
+
vad_params.min_silence_duration_ms = min_silence_duration_ms;
|
|
671
|
+
vad_params.max_speech_duration_s = max_speech_duration_s;
|
|
672
|
+
vad_params.speech_pad_ms = speech_pad_ms;
|
|
673
|
+
vad_params.samples_overlap = samples_overlap;
|
|
674
|
+
|
|
675
|
+
struct whisper_vad_segments *segments = whisper_vad_segments_from_probs(vad_context, vad_params);
|
|
676
|
+
return reinterpret_cast<jlong>(segments);
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
JNIEXPORT jint JNICALL
|
|
680
|
+
Java_com_rnwhisper_WhisperContext_vadGetNSegments(
|
|
681
|
+
JNIEnv *env,
|
|
682
|
+
jobject thiz,
|
|
683
|
+
jlong segments_ptr
|
|
684
|
+
) {
|
|
685
|
+
UNUSED(env);
|
|
686
|
+
UNUSED(thiz);
|
|
687
|
+
struct whisper_vad_segments *segments = reinterpret_cast<struct whisper_vad_segments *>(segments_ptr);
|
|
688
|
+
return whisper_vad_segments_n_segments(segments);
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
JNIEXPORT jfloat JNICALL
|
|
692
|
+
Java_com_rnwhisper_WhisperContext_vadGetSegmentT0(
|
|
693
|
+
JNIEnv *env,
|
|
694
|
+
jobject thiz,
|
|
695
|
+
jlong segments_ptr,
|
|
696
|
+
jint index
|
|
697
|
+
) {
|
|
698
|
+
UNUSED(env);
|
|
699
|
+
UNUSED(thiz);
|
|
700
|
+
struct whisper_vad_segments *segments = reinterpret_cast<struct whisper_vad_segments *>(segments_ptr);
|
|
701
|
+
return whisper_vad_segments_get_segment_t0(segments, index);
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
JNIEXPORT jfloat JNICALL
|
|
705
|
+
Java_com_rnwhisper_WhisperContext_vadGetSegmentT1(
|
|
706
|
+
JNIEnv *env,
|
|
707
|
+
jobject thiz,
|
|
708
|
+
jlong segments_ptr,
|
|
709
|
+
jint index
|
|
710
|
+
) {
|
|
711
|
+
UNUSED(env);
|
|
712
|
+
UNUSED(thiz);
|
|
713
|
+
struct whisper_vad_segments *segments = reinterpret_cast<struct whisper_vad_segments *>(segments_ptr);
|
|
714
|
+
return whisper_vad_segments_get_segment_t1(segments, index);
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
JNIEXPORT void JNICALL
|
|
718
|
+
Java_com_rnwhisper_WhisperContext_vadFreeSegments(
|
|
719
|
+
JNIEnv *env,
|
|
720
|
+
jobject thiz,
|
|
721
|
+
jlong segments_ptr
|
|
722
|
+
) {
|
|
723
|
+
UNUSED(env);
|
|
724
|
+
UNUSED(thiz);
|
|
725
|
+
struct whisper_vad_segments *segments = reinterpret_cast<struct whisper_vad_segments *>(segments_ptr);
|
|
726
|
+
whisper_vad_free_segments(segments);
|
|
727
|
+
}
|
|
728
|
+
|
|
524
729
|
} // extern "C"
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -77,6 +77,32 @@ public class RNWhisperModule extends NativeRNWhisperSpec {
|
|
|
77
77
|
rnwhisper.releaseAllContexts(promise);
|
|
78
78
|
}
|
|
79
79
|
|
|
80
|
+
// VAD methods
|
|
81
|
+
@ReactMethod
|
|
82
|
+
public void initVadContext(final ReadableMap options, final Promise promise) {
|
|
83
|
+
rnwhisper.initVadContext(options, promise);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
@ReactMethod
|
|
87
|
+
public void vadDetectSpeech(double id, String audioDataBase64, ReadableMap options, Promise promise) {
|
|
88
|
+
rnwhisper.vadDetectSpeech(id, audioDataBase64, options, promise);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
@ReactMethod
|
|
92
|
+
public void vadDetectSpeechFile(double id, String filePath, ReadableMap options, Promise promise) {
|
|
93
|
+
rnwhisper.vadDetectSpeechFile(id, filePath, options, promise);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
@ReactMethod
|
|
97
|
+
public void releaseVadContext(double id, Promise promise) {
|
|
98
|
+
rnwhisper.releaseVadContext(id, promise);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
@ReactMethod
|
|
102
|
+
public void releaseAllVadContexts(Promise promise) {
|
|
103
|
+
rnwhisper.releaseAllVadContexts(promise);
|
|
104
|
+
}
|
|
105
|
+
|
|
80
106
|
/*
|
|
81
107
|
* iOS Specific methods, left here for make the turbo module happy:
|
|
82
108
|
*/
|
|
@@ -76,4 +76,30 @@ public class RNWhisperModule extends ReactContextBaseJavaModule {
|
|
|
76
76
|
public void releaseAllContexts(Promise promise) {
|
|
77
77
|
rnwhisper.releaseAllContexts(promise);
|
|
78
78
|
}
|
|
79
|
+
|
|
80
|
+
// VAD methods
|
|
81
|
+
@ReactMethod
|
|
82
|
+
public void initVadContext(final ReadableMap options, final Promise promise) {
|
|
83
|
+
rnwhisper.initVadContext(options, promise);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
@ReactMethod
|
|
87
|
+
public void vadDetectSpeech(double id, String audioDataBase64, ReadableMap options, Promise promise) {
|
|
88
|
+
rnwhisper.vadDetectSpeech(id, audioDataBase64, options, promise);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
@ReactMethod
|
|
92
|
+
public void vadDetectSpeechFile(double id, String filePath, ReadableMap options, Promise promise) {
|
|
93
|
+
rnwhisper.vadDetectSpeechFile(id, filePath, options, promise);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
@ReactMethod
|
|
97
|
+
public void releaseVadContext(double id, Promise promise) {
|
|
98
|
+
rnwhisper.releaseVadContext(id, promise);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
@ReactMethod
|
|
102
|
+
public void releaseAllVadContexts(Promise promise) {
|
|
103
|
+
rnwhisper.releaseAllVadContexts(promise);
|
|
104
|
+
}
|
|
79
105
|
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
#import <CoreML/CoreML.h>
|
|
2
|
+
|
|
3
|
+
@interface MLModel (Compat)
|
|
4
|
+
- (void) predictionFromFeatures:(id<MLFeatureProvider>) input
|
|
5
|
+
completionHandler:(void (^)(id<MLFeatureProvider> output, NSError * error)) completionHandler;
|
|
6
|
+
|
|
7
|
+
- (void) predictionFromFeatures:(id<MLFeatureProvider>) input
|
|
8
|
+
options:(MLPredictionOptions *) options
|
|
9
|
+
completionHandler:(void (^)(id<MLFeatureProvider> output, NSError * error)) completionHandler;
|
|
10
|
+
@end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
#import "whisper-compat.h"
|
|
2
|
+
#import <Foundation/Foundation.h>
|
|
3
|
+
|
|
4
|
+
@implementation MLModel (Compat)
|
|
5
|
+
|
|
6
|
+
#if !defined(MAC_OS_X_VERSION_14_00) || MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_14_00
|
|
7
|
+
|
|
8
|
+
- (void) predictionFromFeatures:(id<MLFeatureProvider>) input
|
|
9
|
+
completionHandler:(void (^)(id<MLFeatureProvider> output, NSError * error)) completionHandler {
|
|
10
|
+
[NSOperationQueue.new addOperationWithBlock:^{
|
|
11
|
+
NSError *error = nil;
|
|
12
|
+
id<MLFeatureProvider> prediction = [self predictionFromFeatures:input error:&error];
|
|
13
|
+
|
|
14
|
+
[NSOperationQueue.mainQueue addOperationWithBlock:^{
|
|
15
|
+
completionHandler(prediction, error);
|
|
16
|
+
}];
|
|
17
|
+
}];
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
- (void) predictionFromFeatures:(id<MLFeatureProvider>) input
|
|
21
|
+
options:(MLPredictionOptions *) options
|
|
22
|
+
completionHandler:(void (^)(id<MLFeatureProvider> output, NSError * error)) completionHandler {
|
|
23
|
+
[NSOperationQueue.new addOperationWithBlock:^{
|
|
24
|
+
NSError *error = nil;
|
|
25
|
+
id<MLFeatureProvider> prediction = [self predictionFromFeatures:input options:options error:&error];
|
|
26
|
+
|
|
27
|
+
[NSOperationQueue.mainQueue addOperationWithBlock:^{
|
|
28
|
+
completionHandler(prediction, error);
|
|
29
|
+
}];
|
|
30
|
+
}];
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
#endif
|
|
34
|
+
|
|
35
|
+
@end
|
|
@@ -11,36 +11,33 @@
|
|
|
11
11
|
|
|
12
12
|
NS_ASSUME_NONNULL_BEGIN
|
|
13
13
|
|
|
14
|
-
|
|
15
14
|
/// Model Prediction Input Type
|
|
16
|
-
API_AVAILABLE(macos(
|
|
15
|
+
API_AVAILABLE(macos(10.15), ios(13.0), watchos(6.0), tvos(13.0)) __attribute__((visibility("hidden")))
|
|
17
16
|
@interface whisper_decoder_implInput : NSObject<MLFeatureProvider>
|
|
18
17
|
|
|
19
|
-
/// token_data as 1 by 1 matrix of
|
|
18
|
+
/// token_data as 1 by 1 matrix of floats
|
|
20
19
|
@property (readwrite, nonatomic, strong) MLMultiArray * token_data;
|
|
21
20
|
|
|
22
|
-
/// audio_data as 1 ×
|
|
21
|
+
/// audio_data as 1 × 1500 × 384 3-dimensional array of floats
|
|
23
22
|
@property (readwrite, nonatomic, strong) MLMultiArray * audio_data;
|
|
24
23
|
- (instancetype)init NS_UNAVAILABLE;
|
|
25
24
|
- (instancetype)initWithToken_data:(MLMultiArray *)token_data audio_data:(MLMultiArray *)audio_data NS_DESIGNATED_INITIALIZER;
|
|
26
25
|
|
|
27
26
|
@end
|
|
28
27
|
|
|
29
|
-
|
|
30
28
|
/// Model Prediction Output Type
|
|
31
|
-
API_AVAILABLE(macos(
|
|
29
|
+
API_AVAILABLE(macos(10.15), ios(13.0), watchos(6.0), tvos(13.0)) __attribute__((visibility("hidden")))
|
|
32
30
|
@interface whisper_decoder_implOutput : NSObject<MLFeatureProvider>
|
|
33
31
|
|
|
34
|
-
///
|
|
35
|
-
@property (readwrite, nonatomic, strong) MLMultiArray *
|
|
32
|
+
/// cast_76 as multidimensional array of floats
|
|
33
|
+
@property (readwrite, nonatomic, strong) MLMultiArray * cast_76;
|
|
36
34
|
- (instancetype)init NS_UNAVAILABLE;
|
|
37
|
-
- (instancetype)
|
|
35
|
+
- (instancetype)initWithCast_76:(MLMultiArray *)cast_76 NS_DESIGNATED_INITIALIZER;
|
|
38
36
|
|
|
39
37
|
@end
|
|
40
38
|
|
|
41
|
-
|
|
42
39
|
/// Class for model loading and prediction
|
|
43
|
-
API_AVAILABLE(macos(
|
|
40
|
+
API_AVAILABLE(macos(10.15), ios(13.0), watchos(6.0), tvos(13.0)) __attribute__((visibility("hidden")))
|
|
44
41
|
@interface whisper_decoder_impl : NSObject
|
|
45
42
|
@property (readonly, nonatomic, nullable) MLModel * model;
|
|
46
43
|
|
|
@@ -94,7 +91,7 @@ API_AVAILABLE(macos(12.0), ios(15.0), watchos(8.0), tvos(15.0)) __attribute__((v
|
|
|
94
91
|
@param configuration The model configuration
|
|
95
92
|
@param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid whisper_decoder_impl instance or NSError object.
|
|
96
93
|
*/
|
|
97
|
-
+ (void)loadWithConfiguration:(MLModelConfiguration *)configuration completionHandler:(void (^)(whisper_decoder_impl * _Nullable model, NSError * _Nullable error))handler;
|
|
94
|
+
+ (void)loadWithConfiguration:(MLModelConfiguration *)configuration completionHandler:(void (^)(whisper_decoder_impl * _Nullable model, NSError * _Nullable error))handler API_AVAILABLE(macos(11.0), ios(14.0), watchos(7.0), tvos(14.0)) __attribute__((visibility("hidden")));
|
|
98
95
|
|
|
99
96
|
/**
|
|
100
97
|
Construct whisper_decoder_impl instance asynchronously with URL of .mlmodelc directory and optional configuration.
|
|
@@ -105,7 +102,7 @@ API_AVAILABLE(macos(12.0), ios(15.0), watchos(8.0), tvos(15.0)) __attribute__((v
|
|
|
105
102
|
@param configuration The model configuration
|
|
106
103
|
@param handler When the model load completes successfully or unsuccessfully, the completion handler is invoked with a valid whisper_decoder_impl instance or NSError object.
|
|
107
104
|
*/
|
|
108
|
-
+ (void)loadContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration completionHandler:(void (^)(whisper_decoder_impl * _Nullable model, NSError * _Nullable error))handler;
|
|
105
|
+
+ (void)loadContentsOfURL:(NSURL *)modelURL configuration:(MLModelConfiguration *)configuration completionHandler:(void (^)(whisper_decoder_impl * _Nullable model, NSError * _Nullable error))handler API_AVAILABLE(macos(11.0), ios(14.0), watchos(7.0), tvos(14.0)) __attribute__((visibility("hidden")));
|
|
109
106
|
|
|
110
107
|
/**
|
|
111
108
|
Make a prediction using the standard interface
|
|
@@ -124,10 +121,25 @@ API_AVAILABLE(macos(12.0), ios(15.0), watchos(8.0), tvos(15.0)) __attribute__((v
|
|
|
124
121
|
*/
|
|
125
122
|
- (nullable whisper_decoder_implOutput *)predictionFromFeatures:(whisper_decoder_implInput *)input options:(MLPredictionOptions *)options error:(NSError * _Nullable __autoreleasing * _Nullable)error;
|
|
126
123
|
|
|
124
|
+
/**
|
|
125
|
+
Make an asynchronous prediction using the standard interface
|
|
126
|
+
@param input an instance of whisper_decoder_implInput to predict from
|
|
127
|
+
@param completionHandler a block that will be called upon completion of the prediction. error will be nil if no error occurred.
|
|
128
|
+
*/
|
|
129
|
+
- (void)predictionFromFeatures:(whisper_decoder_implInput *)input completionHandler:(void (^)(whisper_decoder_implOutput * _Nullable output, NSError * _Nullable error))completionHandler API_AVAILABLE(macos(14.0), ios(17.0), watchos(10.0), tvos(17.0)) __attribute__((visibility("hidden")));
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
Make an asynchronous prediction using the standard interface
|
|
133
|
+
@param input an instance of whisper_decoder_implInput to predict from
|
|
134
|
+
@param options prediction options
|
|
135
|
+
@param completionHandler a block that will be called upon completion of the prediction. error will be nil if no error occurred.
|
|
136
|
+
*/
|
|
137
|
+
- (void)predictionFromFeatures:(whisper_decoder_implInput *)input options:(MLPredictionOptions *)options completionHandler:(void (^)(whisper_decoder_implOutput * _Nullable output, NSError * _Nullable error))completionHandler API_AVAILABLE(macos(14.0), ios(17.0), watchos(10.0), tvos(17.0)) __attribute__((visibility("hidden")));
|
|
138
|
+
|
|
127
139
|
/**
|
|
128
140
|
Make a prediction using the convenience interface
|
|
129
|
-
@param token_data
|
|
130
|
-
@param audio_data
|
|
141
|
+
@param token_data 1 by 1 matrix of floats
|
|
142
|
+
@param audio_data 1 × 1500 × 384 3-dimensional array of floats
|
|
131
143
|
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
|
|
132
144
|
@return the prediction as whisper_decoder_implOutput
|
|
133
145
|
*/
|