npm - whisper.rn - Versions diffs - 0.4.0 → 0.4.2 - Mend

whisper.rn 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/README.md +69 -0
package/android/src/main/java/com/rnwhisper/RNWhisper.java +212 -0
package/android/src/main/java/com/rnwhisper/WhisperContext.java +34 -4
package/android/src/main/java/com/rnwhisper/WhisperVadContext.java +101 -0
package/android/src/main/jni.cpp +196 -0
package/android/src/main/jniLibs/arm64-v8a/librnwhisper.so +0 -0
package/android/src/main/jniLibs/arm64-v8a/librnwhisper_v8fp16_va_2.so +0 -0
package/android/src/main/jniLibs/armeabi-v7a/librnwhisper.so +0 -0
package/android/src/main/jniLibs/armeabi-v7a/librnwhisper_vfpv4.so +0 -0
package/android/src/main/jniLibs/x86_64/librnwhisper.so +0 -0
package/android/src/main/jniLibs/x86_64/librnwhisper_x86_64.so +0 -0
package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +26 -0
package/android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java +26 -0
package/ios/RNWhisper.mm +147 -0
package/ios/RNWhisperContext.mm +18 -24
package/ios/RNWhisperVadContext.h +29 -0
package/ios/RNWhisperVadContext.mm +148 -0
package/jest/mock.js +19 -0
package/lib/commonjs/NativeRNWhisper.js.map +1 -1
package/lib/commonjs/index.js +111 -1
package/lib/commonjs/index.js.map +1 -1
package/lib/module/NativeRNWhisper.js.map +1 -1
package/lib/module/index.js +112 -0
package/lib/module/index.js.map +1 -1
package/lib/typescript/NativeRNWhisper.d.ts +35 -0
package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
package/lib/typescript/index.d.ts +39 -3
package/lib/typescript/index.d.ts.map +1 -1
package/package.json +1 -1
package/src/NativeRNWhisper.ts +48 -0
package/src/index.ts +132 -1

package/README.md CHANGED Viewed

@@ -103,6 +103,75 @@ subscribe(evt => {
 })
 ```
+## Voice Activity Detection (VAD)
+Voice Activity Detection allows you to detect speech segments in audio data using the Silero VAD model.
+#### Initialize VAD Context
+```typescript
+import { initWhisperVad } from 'whisper.rn'
+const vadContext = await initWhisperVad({
+  filePath: require('./assets/ggml-silero-v5.1.2.bin'), // VAD model file
+  useGpu: true, // Use GPU acceleration (iOS only)
+  nThreads: 4, // Number of threads for processing
+})
+```
+#### Detect Speech Segments
+##### From Audio Files
+```typescript
+// Detect speech in audio file (supports same formats as transcribe)
+const segments = await vadContext.detectSpeech(require('./assets/audio.wav'), {
+  threshold: 0.5, // Speech probability threshold (0.0-1.0)
+  minSpeechDurationMs: 250, // Minimum speech duration in ms
+  minSilenceDurationMs: 100, // Minimum silence duration in ms
+  maxSpeechDurationS: 30, // Maximum speech duration in seconds
+  speechPadMs: 30, // Padding around speech segments in ms
+  samplesOverlap: 0.1, // Overlap between analysis windows
+})
+// Also supports:
+// - File paths: vadContext.detectSpeech('path/to/audio.wav', options)
+// - HTTP URLs: vadContext.detectSpeech('https://example.com/audio.wav', options)
+// - Base64 WAV: vadContext.detectSpeech('data:audio/wav;base64,...', options)
+// - Assets: vadContext.detectSpeech(require('./assets/audio.wav'), options)
+```
+##### From Raw Audio Data
+```typescript
+// Detect speech in base64 encoded float32 PCM data
+const segments = await vadContext.detectSpeechData(base64AudioData, {
+  threshold: 0.5,
+  minSpeechDurationMs: 250,
+  minSilenceDurationMs: 100,
+  maxSpeechDurationS: 30,
+  speechPadMs: 30,
+  samplesOverlap: 0.1,
+})
+```
+#### Process Results
+```typescript
+segments.forEach((segment, index) => {
+  console.log(`Segment ${index + 1}: ${segment.t0.toFixed(2)}s - ${segment.t1.toFixed(2)}s`)
+  console.log(`Duration: ${(segment.t1 - segment.t0).toFixed(2)}s`)
+})
+```
+#### Release VAD Context
+```typescript
+await vadContext.release()
+// Or release all VAD contexts
+await releaseAllWhisperVad()
+```
 In iOS, You may need to change the Audio Session so that it can be used with other audio playback, or to optimize the quality of the recording. So we have provided AudioSession utilities for you:
 Option 1 - Use options in transcribeRealtime:

package/android/src/main/java/com/rnwhisper/RNWhisper.java CHANGED Viewed

@@ -13,6 +13,7 @@ import com.facebook.react.bridge.ReactMethod;
 import com.facebook.react.bridge.LifecycleEventListener;
 import com.facebook.react.bridge.ReadableMap;
 import com.facebook.react.bridge.WritableMap;
+import com.facebook.react.bridge.WritableArray;
 import com.facebook.react.bridge.Arguments;
 import java.util.HashMap;
@@ -47,6 +48,7 @@ public class RNWhisper implements LifecycleEventListener {
   private HashMap<AsyncTask, String> tasks = new HashMap<>();
   private HashMap<Integer, WhisperContext> contexts = new HashMap<>();
+  private HashMap<Integer, WhisperVadContext> vadContexts = new HashMap<>();
   private int getResourceIdentifier(String filePath) {
     int identifier = reactContext.getResources().getIdentifier(
@@ -344,6 +346,212 @@ public class RNWhisper implements LifecycleEventListener {
     tasks.put(task, "releaseAllContexts");
   }
+  public void initVadContext(final ReadableMap options, final Promise promise) {
+    AsyncTask task = new AsyncTask<Void, Void, Integer>() {
+      private Exception exception;
+      @Override
+      protected Integer doInBackground(Void... voids) {
+        try {
+          String modelPath = options.getString("filePath");
+          boolean isBundleAsset = options.getBoolean("isBundleAsset");
+          String modelFilePath = modelPath;
+          if (!isBundleAsset && (modelPath.startsWith("http://") || modelPath.startsWith("https://"))) {
+            modelFilePath = downloader.downloadFile(modelPath);
+          }
+          long vadContext;
+          int resId = getResourceIdentifier(modelFilePath);
+          if (resId > 0) {
+            vadContext = WhisperContext.initVadContextWithInputStream(
+              new PushbackInputStream(reactContext.getResources().openRawResource(resId))
+            );
+          } else if (isBundleAsset) {
+            vadContext = WhisperContext.initVadContextWithAsset(reactContext.getAssets(), modelFilePath);
+          } else {
+            vadContext = WhisperContext.initVadContext(modelFilePath);
+          }
+          if (vadContext == 0) {
+            throw new Exception("Failed to initialize VAD context");
+          }
+          int id = Math.abs(new Random().nextInt());
+          WhisperVadContext whisperVadContext = new WhisperVadContext(id, reactContext, vadContext);
+          vadContexts.put(id, whisperVadContext);
+          return id;
+        } catch (Exception e) {
+          exception = e;
+          return null;
+        }
+      }
+      @Override
+      protected void onPostExecute(Integer id) {
+        if (exception != null) {
+          promise.reject(exception);
+          return;
+        }
+        WritableMap result = Arguments.createMap();
+        result.putInt("contextId", id);
+        result.putBoolean("gpu", false);
+        result.putString("reasonNoGPU", "Currently not supported");
+        promise.resolve(result);
+        tasks.remove(this);
+      }
+    }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
+    tasks.put(task, "initVadContext");
+  }
+  public void vadDetectSpeech(double id, String audioDataBase64, ReadableMap options, Promise promise) {
+    final WhisperVadContext vadContext = vadContexts.get((int) id);
+    if (vadContext == null) {
+      promise.reject("VAD context not found");
+      return;
+    }
+    AsyncTask task = new AsyncTask<Void, Void, WritableArray>() {
+      private Exception exception;
+      @Override
+      protected WritableArray doInBackground(Void... voids) {
+        try {
+          float[] audioData = AudioUtils.decodePcmData(audioDataBase64);
+          return vadContext.detectSpeechWithAudioData(audioData, audioData.length, options);
+        } catch (Exception e) {
+          exception = e;
+          return null;
+        }
+      }
+      @Override
+      protected void onPostExecute(WritableArray segments) {
+        if (exception != null) {
+          promise.reject(exception);
+          return;
+        }
+        promise.resolve(segments);
+        tasks.remove(this);
+      }
+    }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
+    tasks.put(task, "vadDetectSpeech-" + id);
+  }
+  public void vadDetectSpeechFile(double id, String filePathOrBase64, ReadableMap options, Promise promise) {
+    final WhisperVadContext vadContext = vadContexts.get((int) id);
+    if (vadContext == null) {
+      promise.reject("VAD context not found");
+      return;
+    }
+    AsyncTask task = new AsyncTask<Void, Void, WritableArray>() {
+      private Exception exception;
+      @Override
+      protected WritableArray doInBackground(Void... voids) {
+        try {
+          // Handle file processing like transcribeFile does
+          String filePath = filePathOrBase64;
+          if (filePathOrBase64.startsWith("http://") || filePathOrBase64.startsWith("https://")) {
+            filePath = downloader.downloadFile(filePathOrBase64);
+          }
+          float[] audioData;
+          int resId = getResourceIdentifier(filePath);
+          if (resId > 0) {
+            audioData = AudioUtils.decodeWaveFile(reactContext.getResources().openRawResource(resId));
+          } else if (filePathOrBase64.startsWith("data:audio/wav;base64,")) {
+            audioData = AudioUtils.decodeWaveData(filePathOrBase64);
+          } else {
+            audioData = AudioUtils.decodeWaveFile(new java.io.FileInputStream(new java.io.File(filePath)));
+          }
+          if (audioData == null) {
+            throw new Exception("Failed to load audio file: " + filePathOrBase64);
+          }
+          return vadContext.detectSpeechWithAudioData(audioData, audioData.length, options);
+        } catch (Exception e) {
+          exception = e;
+          return null;
+        }
+      }
+      @Override
+      protected void onPostExecute(WritableArray segments) {
+        if (exception != null) {
+          promise.reject(exception);
+          return;
+        }
+        promise.resolve(segments);
+        tasks.remove(this);
+      }
+    }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
+    tasks.put(task, "vadDetectSpeechFile-" + id);
+  }
+  public void releaseVadContext(double id, Promise promise) {
+    final int contextId = (int) id;
+    AsyncTask task = new AsyncTask<Void, Void, Void>() {
+      private Exception exception;
+      @Override
+      protected Void doInBackground(Void... voids) {
+        try {
+          WhisperVadContext vadContext = vadContexts.get(contextId);
+          if (vadContext == null) {
+            throw new Exception("VAD context " + id + " not found");
+          }
+          vadContext.release();
+          vadContexts.remove(contextId);
+        } catch (Exception e) {
+          exception = e;
+        }
+        return null;
+      }
+      @Override
+      protected void onPostExecute(Void result) {
+        if (exception != null) {
+          promise.reject(exception);
+          return;
+        }
+        promise.resolve(null);
+        tasks.remove(this);
+      }
+    }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
+    tasks.put(task, "releaseVadContext-" + id);
+  }
+  public void releaseAllVadContexts(Promise promise) {
+    AsyncTask task = new AsyncTask<Void, Void, Void>() {
+      private Exception exception;
+      @Override
+      protected Void doInBackground(Void... voids) {
+        try {
+          for (WhisperVadContext vadContext : vadContexts.values()) {
+            vadContext.release();
+          }
+          vadContexts.clear();
+        } catch (Exception e) {
+          exception = e;
+        }
+        return null;
+      }
+      @Override
+      protected void onPostExecute(Void result) {
+        if (exception != null) {
+          promise.reject(exception);
+          return;
+        }
+        promise.resolve(null);
+        tasks.remove(this);
+      }
+    }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
+    tasks.put(task, "releaseAllVadContexts");
+  }
   @Override
   public void onHostResume() {
   }
@@ -367,8 +575,12 @@ public class RNWhisper implements LifecycleEventListener {
     for (WhisperContext context : contexts.values()) {
       context.release();
     }
+    for (WhisperVadContext vadContext : vadContexts.values()) {
+      vadContext.release();
+    }
     WhisperContext.abortAllTranscribe(); // graceful abort
     contexts.clear();
+    vadContexts.clear();
     downloader.clearCache();
   }
 }

package/android/src/main/java/com/rnwhisper/WhisperContext.java CHANGED Viewed

@@ -460,19 +460,19 @@ public class WhisperContext {
     }
   }
-  private static boolean isArm64V8a() {
+  public static boolean isArm64V8a() {
     return Build.SUPPORTED_ABIS[0].equals("arm64-v8a");
   }
-  private static boolean isArmeabiV7a() {
+  public static boolean isArmeabiV7a() {
     return Build.SUPPORTED_ABIS[0].equals("armeabi-v7a");
   }
-  private static boolean isX86_64() {
+  public static boolean isX86_64() {
     return Build.SUPPORTED_ABIS[0].equals("x86_64");
   }
-  private static String getCpuFeatures() {
+  public static String getCpuFeatures() {
     File file = new File("/proc/cpuinfo");
     StringBuilder stringBuilder = new StringBuilder();
     try {
@@ -492,6 +492,10 @@ public class WhisperContext {
     }
   }
+  public static String getLoadedLibrary() {
+    return loadedLibrary;
+  }
   // JNI methods
   protected static native long initContext(String modelPath);
   protected static native long initContextWithAsset(AssetManager assetManager, String modelPath);
@@ -529,4 +533,30 @@ public class WhisperContext {
     int n_samples
   );
   protected static native String bench(long context, int n_threads);
+  // VAD JNI methods
+  protected static native long initVadContext(String modelPath);
+  protected static native long initVadContextWithAsset(AssetManager assetManager, String modelPath);
+  protected static native long initVadContextWithInputStream(PushbackInputStream inputStream);
+  protected static native void freeVadContext(long vadContextPtr);
+  protected static native boolean vadDetectSpeech(long vadContextPtr, float[] audioData, int nSamples);
+  protected static native long vadGetSegmentsFromProbs(long vadContextPtr, float threshold,
+                                                       int minSpeechDurationMs, int minSilenceDurationMs,
+                                                       float maxSpeechDurationS, int speechPadMs,
+                                                       float samplesOverlap);
+  protected static native int vadGetNSegments(long segmentsPtr);
+  protected static native float vadGetSegmentT0(long segmentsPtr, int index);
+  protected static native float vadGetSegmentT1(long segmentsPtr, int index);
+  protected static native void vadFreeSegments(long segmentsPtr);
+  // Audio file loading utility for VAD
+  public static float[] loadAudioFileAsFloat32(String filePath) {
+    try {
+      java.io.FileInputStream fis = new java.io.FileInputStream(new java.io.File(filePath));
+      return AudioUtils.decodeWaveFile(fis);
+    } catch (Exception e) {
+      Log.e(NAME, "Failed to load audio file: " + filePath, e);
+      return null;
+    }
+  }
 }

package/android/src/main/java/com/rnwhisper/WhisperVadContext.java ADDED Viewed

@@ -0,0 +1,101 @@
+package com.rnwhisper;
+import com.facebook.react.bridge.Arguments;
+import com.facebook.react.bridge.WritableArray;
+import com.facebook.react.bridge.WritableMap;
+import com.facebook.react.bridge.ReadableMap;
+import com.facebook.react.bridge.ReactApplicationContext;
+import android.util.Log;
+import android.content.res.AssetManager;
+import android.util.Base64;
+import java.io.PushbackInputStream;
+public class WhisperVadContext {
+    public static final String NAME = "RNWhisperVadContext";
+    private int id;
+    private ReactApplicationContext reactContext;
+    private long vadContext;
+    public WhisperVadContext(int id, ReactApplicationContext reactContext, long vadContext) {
+        this.id = id;
+        this.vadContext = vadContext;
+        this.reactContext = reactContext;
+    }
+    public WritableArray detectSpeechWithAudioData(float[] audioData, int numSamples, ReadableMap options) throws Exception {
+        if (vadContext == 0) {
+            throw new Exception("VAD context is null");
+        }
+        return processVadDetection(audioData, numSamples, options);
+    }
+    private int getResourceIdentifier(String filePath) {
+        int identifier = reactContext.getResources().getIdentifier(
+            filePath,
+            "drawable",
+            reactContext.getPackageName()
+        );
+        if (identifier == 0) {
+            identifier = reactContext.getResources().getIdentifier(
+                filePath,
+                "raw",
+                reactContext.getPackageName()
+            );
+        }
+        return identifier;
+    }
+    private WritableArray processVadDetection(float[] audioData, int numSamples, ReadableMap options) throws Exception {
+        // Run VAD detection using WhisperContext static methods
+        boolean speechDetected = WhisperContext.vadDetectSpeech(vadContext, audioData, numSamples);
+        if (!speechDetected) {
+            return Arguments.createArray();
+        }
+        // Set VAD parameters from options
+        float threshold = options.hasKey("threshold") ? (float) options.getDouble("threshold") : 0.5f;
+        int minSpeechDurationMs = options.hasKey("minSpeechDurationMs") ? options.getInt("minSpeechDurationMs") : 250;
+        int minSilenceDurationMs = options.hasKey("minSilenceDurationMs") ? options.getInt("minSilenceDurationMs") : 100;
+        float maxSpeechDurationS = options.hasKey("maxSpeechDurationS") ? (float) options.getDouble("maxSpeechDurationS") : 30.0f;
+        int speechPadMs = options.hasKey("speechPadMs") ? options.getInt("speechPadMs") : 30;
+        float samplesOverlap = options.hasKey("samplesOverlap") ? (float) options.getDouble("samplesOverlap") : 0.1f;
+        // Get segments from VAD using WhisperContext static methods
+        long segments = WhisperContext.vadGetSegmentsFromProbs(vadContext, threshold, minSpeechDurationMs,
+                                               minSilenceDurationMs, maxSpeechDurationS,
+                                               speechPadMs, samplesOverlap);
+        if (segments == 0) {
+            return Arguments.createArray();
+        }
+        // Convert segments to WritableArray using WhisperContext static methods
+        WritableArray result = Arguments.createArray();
+        int nSegments = WhisperContext.vadGetNSegments(segments);
+        for (int i = 0; i < nSegments; i++) {
+            float t0 = WhisperContext.vadGetSegmentT0(segments, i);
+            float t1 = WhisperContext.vadGetSegmentT1(segments, i);
+            WritableMap segment = Arguments.createMap();
+            segment.putDouble("t0", t0);
+            segment.putDouble("t1", t1);
+            result.pushMap(segment);
+        }
+        // Clean up using WhisperContext static methods
+        WhisperContext.vadFreeSegments(segments);
+        return result;
+    }
+    public void release() {
+        if (vadContext != 0) {
+            WhisperContext.freeVadContext(vadContext);
+            vadContext = 0;
+        }
+    }
+}

package/android/src/main/jni.cpp CHANGED Viewed

@@ -148,6 +148,47 @@ static struct whisper_context *whisper_init_from_asset(
     return whisper_init_with_params(&loader, cparams);
 }
+// VAD context initialization functions
+static struct whisper_vad_context *whisper_vad_init_from_input_stream(
+    JNIEnv *env,
+    jobject input_stream, // PushbackInputStream
+    struct whisper_vad_context_params vad_params
+) {
+    input_stream_context *context = new input_stream_context;
+    context->env = env;
+    context->input_stream = env->NewGlobalRef(input_stream);
+    whisper_model_loader loader = {
+        .context = context,
+        .read = &input_stream_read,
+        .eof = &input_stream_is_eof,
+        .close = &input_stream_close
+    };
+    return whisper_vad_init_with_params(&loader, vad_params);
+}
+static struct whisper_vad_context *whisper_vad_init_from_asset(
+    JNIEnv *env,
+    jobject assetManager,
+    const char *asset_path,
+    struct whisper_vad_context_params vad_params
+) {
+    LOGI("Loading VAD model from asset '%s'\n", asset_path);
+    AAssetManager *asset_manager = AAssetManager_fromJava(env, assetManager);
+    AAsset *asset = AAssetManager_open(asset_manager, asset_path, AASSET_MODE_STREAMING);
+    if (!asset) {
+        LOGW("Failed to open VAD asset '%s'\n", asset_path);
+        return NULL;
+    }
+    whisper_model_loader loader = {
+        .context = asset,
+        .read = &asset_read,
+        .eof = &asset_is_eof,
+        .close = &asset_close
+    };
+    return whisper_vad_init_with_params(&loader, vad_params);
+}
 extern "C" {
 JNIEXPORT jlong JNICALL
@@ -530,4 +571,159 @@ Java_com_rnwhisper_WhisperContext_bench(
     return env->NewStringUTF(result.c_str());
 }
+// VAD Context JNI implementations
+JNIEXPORT jlong JNICALL
+Java_com_rnwhisper_WhisperContext_initVadContext(
+    JNIEnv *env,
+    jobject thiz,
+    jstring model_path_str
+) {
+    UNUSED(thiz);
+    struct whisper_vad_context_params vad_params = whisper_vad_default_context_params();
+    struct whisper_vad_context *vad_context = nullptr;
+    const char *model_path_chars = env->GetStringUTFChars(model_path_str, nullptr);
+    vad_context = whisper_vad_init_from_file_with_params(model_path_chars, vad_params);
+    env->ReleaseStringUTFChars(model_path_str, model_path_chars);
+    return reinterpret_cast<jlong>(vad_context);
+}
+JNIEXPORT jlong JNICALL
+Java_com_rnwhisper_WhisperContext_initVadContextWithAsset(
+    JNIEnv *env,
+    jobject thiz,
+    jobject asset_manager,
+    jstring model_path_str
+) {
+    UNUSED(thiz);
+    struct whisper_vad_context_params vad_params = whisper_vad_default_context_params();
+    struct whisper_vad_context *vad_context = nullptr;
+    const char *model_path_chars = env->GetStringUTFChars(model_path_str, nullptr);
+    vad_context = whisper_vad_init_from_asset(env, asset_manager, model_path_chars, vad_params);
+    env->ReleaseStringUTFChars(model_path_str, model_path_chars);
+    return reinterpret_cast<jlong>(vad_context);
+}
+JNIEXPORT jlong JNICALL
+Java_com_rnwhisper_WhisperContext_initVadContextWithInputStream(
+    JNIEnv *env,
+    jobject thiz,
+    jobject input_stream
+) {
+    UNUSED(thiz);
+    struct whisper_vad_context_params vad_params = whisper_vad_default_context_params();
+    struct whisper_vad_context *vad_context = nullptr;
+    vad_context = whisper_vad_init_from_input_stream(env, input_stream, vad_params);
+    return reinterpret_cast<jlong>(vad_context);
+}
+JNIEXPORT void JNICALL
+Java_com_rnwhisper_WhisperContext_freeVadContext(
+    JNIEnv *env,
+    jobject thiz,
+    jlong vad_context_ptr
+) {
+    UNUSED(env);
+    UNUSED(thiz);
+    struct whisper_vad_context *vad_context = reinterpret_cast<struct whisper_vad_context *>(vad_context_ptr);
+    whisper_vad_free(vad_context);
+}
+JNIEXPORT jboolean JNICALL
+Java_com_rnwhisper_WhisperContext_vadDetectSpeech(
+    JNIEnv *env,
+    jobject thiz,
+    jlong vad_context_ptr,
+    jfloatArray audio_data,
+    jint n_samples
+) {
+    UNUSED(thiz);
+    struct whisper_vad_context *vad_context = reinterpret_cast<struct whisper_vad_context *>(vad_context_ptr);
+    jfloat *audio_data_arr = env->GetFloatArrayElements(audio_data, nullptr);
+    bool result = whisper_vad_detect_speech(vad_context, audio_data_arr, n_samples);
+    env->ReleaseFloatArrayElements(audio_data, audio_data_arr, JNI_ABORT);
+    return result;
+}
+JNIEXPORT jlong JNICALL
+Java_com_rnwhisper_WhisperContext_vadGetSegmentsFromProbs(
+    JNIEnv *env,
+    jobject thiz,
+    jlong vad_context_ptr,
+    jfloat threshold,
+    jint min_speech_duration_ms,
+    jint min_silence_duration_ms,
+    jfloat max_speech_duration_s,
+    jint speech_pad_ms,
+    jfloat samples_overlap
+) {
+    UNUSED(thiz);
+    struct whisper_vad_context *vad_context = reinterpret_cast<struct whisper_vad_context *>(vad_context_ptr);
+    struct whisper_vad_params vad_params = whisper_vad_default_params();
+    vad_params.threshold = threshold;
+    vad_params.min_speech_duration_ms = min_speech_duration_ms;
+    vad_params.min_silence_duration_ms = min_silence_duration_ms;
+    vad_params.max_speech_duration_s = max_speech_duration_s;
+    vad_params.speech_pad_ms = speech_pad_ms;
+    vad_params.samples_overlap = samples_overlap;
+    struct whisper_vad_segments *segments = whisper_vad_segments_from_probs(vad_context, vad_params);
+    return reinterpret_cast<jlong>(segments);
+}
+JNIEXPORT jint JNICALL
+Java_com_rnwhisper_WhisperContext_vadGetNSegments(
+    JNIEnv *env,
+    jobject thiz,
+    jlong segments_ptr
+) {
+    UNUSED(env);
+    UNUSED(thiz);
+    struct whisper_vad_segments *segments = reinterpret_cast<struct whisper_vad_segments *>(segments_ptr);
+    return whisper_vad_segments_n_segments(segments);
+}
+JNIEXPORT jfloat JNICALL
+Java_com_rnwhisper_WhisperContext_vadGetSegmentT0(
+    JNIEnv *env,
+    jobject thiz,
+    jlong segments_ptr,
+    jint index
+) {
+    UNUSED(env);
+    UNUSED(thiz);
+    struct whisper_vad_segments *segments = reinterpret_cast<struct whisper_vad_segments *>(segments_ptr);
+    return whisper_vad_segments_get_segment_t0(segments, index);
+}
+JNIEXPORT jfloat JNICALL
+Java_com_rnwhisper_WhisperContext_vadGetSegmentT1(
+    JNIEnv *env,
+    jobject thiz,
+    jlong segments_ptr,
+    jint index
+) {
+    UNUSED(env);
+    UNUSED(thiz);
+    struct whisper_vad_segments *segments = reinterpret_cast<struct whisper_vad_segments *>(segments_ptr);
+    return whisper_vad_segments_get_segment_t1(segments, index);
+}
+JNIEXPORT void JNICALL
+Java_com_rnwhisper_WhisperContext_vadFreeSegments(
+    JNIEnv *env,
+    jobject thiz,
+    jlong segments_ptr
+) {
+    UNUSED(env);
+    UNUSED(thiz);
+    struct whisper_vad_segments *segments = reinterpret_cast<struct whisper_vad_segments *>(segments_ptr);
+    whisper_vad_free_segments(segments);
+}
 } // extern "C"

package/android/src/main/jniLibs/arm64-v8a/librnwhisper.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/arm64-v8a/librnwhisper_v8fp16_va_2.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/armeabi-v7a/librnwhisper.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/armeabi-v7a/librnwhisper_vfpv4.so CHANGED Viewed

Binary file