npm - whisper.rn - Versions diffs - 0.3.6 → 0.3.8 - Mend

whisper.rn 0.3.6 → 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/README.md +28 -0
package/android/src/main/java/com/rnwhisper/AudioUtils.java +119 -0
package/android/src/main/java/com/rnwhisper/WhisperContext.java +74 -39
package/android/src/main/jni.cpp +45 -12
package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +26 -0
package/cpp/rn-whisper.cpp +51 -0
package/cpp/rn-whisper.h +2 -1
package/ios/RNWhisper.mm +81 -22
package/ios/RNWhisper.xcodeproj/project.pbxproj +27 -3
package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcuserdata/jhen.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
package/ios/RNWhisper.xcodeproj/xcuserdata/jhen.xcuserdatad/xcschemes/xcschememanagement.plist +5 -0
package/ios/RNWhisperAudioSessionUtils.h +13 -0
package/ios/RNWhisperAudioSessionUtils.m +85 -0
package/ios/RNWhisperAudioUtils.h +9 -0
package/ios/RNWhisperAudioUtils.m +83 -0
package/ios/RNWhisperContext.h +1 -0
package/ios/RNWhisperContext.mm +101 -28
package/lib/commonjs/AudioSessionIos.js +91 -0
package/lib/commonjs/AudioSessionIos.js.map +1 -0
package/lib/commonjs/NativeRNWhisper.js.map +1 -1
package/lib/commonjs/index.js +82 -14
package/lib/commonjs/index.js.map +1 -1
package/lib/module/AudioSessionIos.js +83 -0
package/lib/module/AudioSessionIos.js.map +1 -0
package/lib/module/NativeRNWhisper.js.map +1 -1
package/lib/module/index.js +77 -14
package/lib/module/index.js.map +1 -1
package/lib/typescript/AudioSessionIos.d.ts +54 -0
package/lib/typescript/AudioSessionIos.d.ts.map +1 -0
package/lib/typescript/NativeRNWhisper.d.ts +8 -0
package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
package/lib/typescript/index.d.ts +62 -4
package/lib/typescript/index.d.ts.map +1 -1
package/package.json +1 -1
package/src/AudioSessionIos.ts +90 -0
package/src/NativeRNWhisper.ts +11 -1
package/src/index.ts +178 -28

package/README.md CHANGED Viewed

@@ -99,6 +99,34 @@ subscribe(evt => {
 })
 ```
+In iOS, You may need to change the Audio Session so that it can be used with other audio playback, or to optimize the quality of the recording. So we have provided AudioSession utilities for you:
+Option 1 - Use options in transcribeRealtime:
+```js
+import { AudioSessionIos } from 'whisper.rn'
+const { stop, subscribe } = await whisperContext.transcribeRealtime({
+  audioSessionOnStartIos: {
+    category: AudioSessionIos.Category.PlayAndRecord,
+    options: [AudioSessionIos.CategoryOption.MixWithOthers],
+    mode: AudioSessionIos.Mode.Default,
+  },
+  audioSessionOnStopIos: 'restore', // Or an AudioSessionSettingIos
+})
+```
+Option 2 - Manage the Audio Session in anywhere:
+```js
+import { AudioSessionIos } from 'whisper.rn'
+await AudioSessionIos.setCategory(
+  AudioSessionIos.Category.PlayAndRecord, [AudioSessionIos.CategoryOption.MixWithOthers],
+)
+await AudioSessionIos.setMode(AudioSessionIos.Mode.Default)
+await AudioSessionIos.setActive(true)
+// Then you can start do recording
+```
 In Android, you may need to request the microphone permission by [`PermissionAndroid`](https://reactnative.dev/docs/permissionsandroid).
 Please visit the [Documentation](docs/) for more details.

package/android/src/main/java/com/rnwhisper/AudioUtils.java ADDED Viewed

@@ -0,0 +1,119 @@
+package com.rnwhisper;
+import android.util.Log;
+import java.util.ArrayList;
+import java.lang.StringBuilder;
+import java.io.IOException;
+import java.io.FileReader;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.ShortBuffer;
+public class AudioUtils {
+  private static final String NAME = "RNWhisperAudioUtils";
+  private static final int SAMPLE_RATE = 16000;
+  private static byte[] shortToByte(short[] shortInts) {
+    int j = 0;
+    int length = shortInts.length;
+    byte[] byteData = new byte[length * 2];
+    for (int i = 0; i < length; i++) {
+      byteData[j++] = (byte) (shortInts[i] >>> 8);
+      byteData[j++] = (byte) (shortInts[i] >>> 0);
+    }
+    return byteData;
+  }
+  public static byte[] concatShortBuffers(ArrayList<short[]> buffers) {
+    int totalLength = 0;
+    for (int i = 0; i < buffers.size(); i++) {
+      totalLength += buffers.get(i).length;
+    }
+    byte[] result = new byte[totalLength * 2];
+    int offset = 0;
+    for (int i = 0; i < buffers.size(); i++) {
+      byte[] bytes = shortToByte(buffers.get(i));
+      System.arraycopy(bytes, 0, result, offset, bytes.length);
+      offset += bytes.length;
+    }
+    return result;
+  }
+  private static byte[] removeTrailingZeros(byte[] audioData) {
+    int i = audioData.length - 1;
+    while (i >= 0 && audioData[i] == 0) {
+      --i;
+    }
+    byte[] newData = new byte[i + 1];
+    System.arraycopy(audioData, 0, newData, 0, i + 1);
+    return newData;
+  }
+  public static void saveWavFile(byte[] rawData, String audioOutputFile) throws IOException {
+    Log.d(NAME, "call saveWavFile");
+    rawData = removeTrailingZeros(rawData);
+    DataOutputStream output = null;
+    try {
+      output = new DataOutputStream(new FileOutputStream(audioOutputFile));
+      // WAVE header
+      // see http://ccrma.stanford.edu/courses/422/projects/WaveFormat/
+      output.writeBytes("RIFF"); // chunk id
+      output.writeInt(Integer.reverseBytes(36 + rawData.length)); // chunk size
+      output.writeBytes("WAVE"); // format
+      output.writeBytes("fmt "); // subchunk 1 id
+      output.writeInt(Integer.reverseBytes(16)); // subchunk 1 size
+      output.writeShort(Short.reverseBytes((short) 1)); // audio format (1 = PCM)
+      output.writeShort(Short.reverseBytes((short) 1)); // number of channels
+      output.writeInt(Integer.reverseBytes(SAMPLE_RATE)); // sample rate
+      output.writeInt(Integer.reverseBytes(SAMPLE_RATE * 2)); // byte rate
+      output.writeShort(Short.reverseBytes((short) 2)); // block align
+      output.writeShort(Short.reverseBytes((short) 16)); // bits per sample
+      output.writeBytes("data"); // subchunk 2 id
+      output.writeInt(Integer.reverseBytes(rawData.length)); // subchunk 2 size
+      // Audio data (conversion big endian -> little endian)
+      short[] shorts = new short[rawData.length / 2];
+      ByteBuffer.wrap(rawData).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shorts);
+      ByteBuffer bytes = ByteBuffer.allocate(shorts.length * 2);
+      for (short s : shorts) {
+        bytes.putShort(s);
+      }
+      Log.d(NAME, "writing audio file: " + audioOutputFile);
+      output.write(bytes.array());
+    } finally {
+      if (output != null) {
+        output.close();
+      }
+    }
+  }
+  public static float[] decodeWaveFile(InputStream inputStream) throws IOException {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    byte[] buffer = new byte[1024];
+    int bytesRead;
+    while ((bytesRead = inputStream.read(buffer)) != -1) {
+      baos.write(buffer, 0, bytesRead);
+    }
+    ByteBuffer byteBuffer = ByteBuffer.wrap(baos.toByteArray());
+    byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
+    byteBuffer.position(44);
+    ShortBuffer shortBuffer = byteBuffer.asShortBuffer();
+    short[] shortArray = new short[shortBuffer.limit()];
+    shortBuffer.get(shortArray);
+    float[] floatArray = new float[shortArray.length];
+    for (int i = 0; i < shortArray.length; i++) {
+      floatArray[i] = ((float) shortArray[i]) / 32767.0f;
+      floatArray[i] = Math.max(floatArray[i], -1f);
+      floatArray[i] = Math.min(floatArray[i], 1f);
+    }
+    return floatArray;
+  }
+}

package/android/src/main/java/com/rnwhisper/WhisperContext.java CHANGED Viewed

@@ -14,22 +14,15 @@ import android.media.AudioFormat;
 import android.media.AudioRecord;
 import android.media.MediaRecorder.AudioSource;
-import java.util.Random;
 import java.util.ArrayList;
 import java.lang.StringBuilder;
-import java.io.File;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.FileReader;
-import java.io.ByteArrayOutputStream;
 import java.io.File;
-import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.PushbackInputStream;
-import java.nio.ByteBuffer;
-import java.nio.ByteOrder;
-import java.nio.ShortBuffer;
 public class WhisperContext {
   public static final String NAME = "RNWhisperContext";
@@ -86,6 +79,27 @@ public class WhisperContext {
     fullHandler = null;
   }
+  private boolean vad(ReadableMap options, short[] shortBuffer, int nSamples, int n) {
+    boolean isSpeech = true;
+    if (!isTranscribing && options.hasKey("useVad") && options.getBoolean("useVad")) {
+      int vadSec = options.hasKey("vadMs") ? options.getInt("vadMs") / 1000 : 2;
+      int sampleSize = vadSec * SAMPLE_RATE;
+      if (nSamples + n > sampleSize) {
+        int start = nSamples + n - sampleSize;
+        float[] audioData = new float[sampleSize];
+        for (int i = 0; i < sampleSize; i++) {
+          audioData[i] = shortBuffer[i + start] / 32768.0f;
+        }
+        float vadThold = options.hasKey("vadThold") ? (float) options.getDouble("vadThold") : 0.6f;
+        float vadFreqThold = options.hasKey("vadFreqThold") ? (float) options.getDouble("vadFreqThold") : 0.6f;
+        isSpeech = vadSimple(audioData, sampleSize, vadThold, vadFreqThold);
+      } else {
+        isSpeech = false;
+      }
+    }
+    return isSpeech;
+  }
   public int startRealtimeTranscribe(int jobId, ReadableMap options) {
     if (isCapturing || isTranscribing) {
       return -100;
@@ -111,6 +125,8 @@ public class WhisperContext {
     isUseSlices = audioSliceSec < audioSec;
+    String audioOutputPath = options.hasKey("audioOutputPath") ? options.getString("audioOutputPath") : null;
     shortBufferSlices = new ArrayList<short[]>();
     shortBufferSlices.add(new short[audioSliceSec * SAMPLE_RATE]);
     sliceNSamples = new ArrayList<Integer>();
@@ -145,6 +161,12 @@ public class WhisperContext {
                 ) {
                   emitTranscribeEvent("@RNWhisper_onRealtimeTranscribeEnd", Arguments.createMap());
                 } else if (!isTranscribing) {
+                  short[] shortBuffer = shortBufferSlices.get(sliceIndex);
+                  boolean isSpeech = vad(options, shortBuffer, nSamples, 0);
+                  if (!isSpeech) {
+                    emitTranscribeEvent("@RNWhisper_onRealtimeTranscribeEnd", Arguments.createMap());
+                    break;
+                  }
                   isTranscribing = true;
                   fullTranscribeSamples(options, true);
                 }
@@ -166,9 +188,14 @@ public class WhisperContext {
               for (int i = 0; i < n; i++) {
                 shortBuffer[nSamples + i] = buffer[i];
               }
+              boolean isSpeech = vad(options, shortBuffer, nSamples, n);
               nSamples += n;
               sliceNSamples.set(sliceIndex, nSamples);
+              if (!isSpeech) continue;
               if (!isTranscribing && nSamples > SAMPLE_RATE / 2) {
                 isTranscribing = true;
                 fullHandler = new Thread(new Runnable() {
@@ -183,6 +210,9 @@ public class WhisperContext {
               Log.e(NAME, "Error transcribing realtime: " + e.getMessage());
             }
           }
+          // TODO: Append in real time so we don't need to keep all slices & also reduce memory usage
+          Log.d(NAME, "Begin saving wav file to " + audioOutputPath);
+          AudioUtils.saveWavFile(AudioUtils.concatShortBuffers(shortBufferSlices), audioOutputPath);
           if (!isTranscribing) {
             emitTranscribeEvent("@RNWhisper_onRealtimeTranscribeEnd", Arguments.createMap());
           }
@@ -233,7 +263,7 @@ public class WhisperContext {
     payload.putInt("sliceIndex", transcribeSliceIndex);
     if (code == 0) {
-      payload.putMap("data", getTextSegments());
+      payload.putMap("data", getTextSegments(0, getTextSegmentCount(context)));
     } else {
       payload.putString("error", "Transcribe failed with code " + code);
     }
@@ -293,16 +323,41 @@ public class WhisperContext {
     eventEmitter.emit("@RNWhisper_onTranscribeProgress", event);
   }
-  private static class ProgressCallback {
+  private void emitNewSegments(WritableMap result) {
+    WritableMap event = Arguments.createMap();
+    event.putInt("contextId", WhisperContext.this.id);
+    event.putInt("jobId", jobId);
+    event.putMap("result", result);
+    eventEmitter.emit("@RNWhisper_onTranscribeNewSegments", event);
+  }
+  private static class Callback {
     WhisperContext context;
+    boolean emitProgressNeeded = false;
+    boolean emitNewSegmentsNeeded = false;
+    int totalNNew = 0;
-    public ProgressCallback(WhisperContext context) {
+    public Callback(WhisperContext context, boolean emitProgressNeeded, boolean emitNewSegmentsNeeded) {
       this.context = context;
+      this.emitProgressNeeded = emitProgressNeeded;
+      this.emitNewSegmentsNeeded = emitNewSegmentsNeeded;
     }
     void onProgress(int progress) {
+      if (!emitProgressNeeded) return;
       context.emitProgress(progress);
     }
+    void onNewSegments(int nNew) {
+      Log.d(NAME, "onNewSegments: " + nNew);
+      totalNNew += nNew;
+      if (!emitNewSegmentsNeeded) return;
+      WritableMap result = context.getTextSegments(totalNNew - nNew, totalNNew);
+      result.putInt("nNew", nNew);
+      result.putInt("totalNNew", totalNNew);
+      context.emitNewSegments(result);
+    }
   }
   public WritableMap transcribeInputStream(int jobId, InputStream inputStream, ReadableMap options) throws IOException, Exception {
@@ -313,19 +368,21 @@ public class WhisperContext {
     this.jobId = jobId;
     isTranscribing = true;
-    float[] audioData = decodeWaveFile(inputStream);
+    float[] audioData = AudioUtils.decodeWaveFile(inputStream);
     int code = full(jobId, options, audioData, audioData.length);
     isTranscribing = false;
     this.jobId = -1;
     if (code != 0) {
       throw new Exception("Failed to transcribe the file. Code: " + code);
     }
-    WritableMap result = getTextSegments();
+    WritableMap result = getTextSegments(0, getTextSegmentCount(context));
     result.putBoolean("isAborted", isStoppedByAction);
     return result;
   }
   private int full(int jobId, ReadableMap options, float[] audioData, int audioDataLen) {
+    boolean hasProgressCallback = options.hasKey("onProgress") && options.getBoolean("onProgress");
+    boolean hasNewSegmentsCallback = options.hasKey("onNewSegments") && options.getBoolean("onNewSegments");
     return fullTranscribe(
       jobId,
       context,
@@ -365,13 +422,12 @@ public class WhisperContext {
       options.hasKey("language") ? options.getString("language") : "auto",
       // jstring prompt
       options.hasKey("prompt") ? options.getString("prompt") : null,
-      // ProgressCallback progressCallback
-      options.hasKey("onProgress") && options.getBoolean("onProgress") ? new ProgressCallback(this) : null
+      // Callback callback
+      hasProgressCallback || hasNewSegmentsCallback ? new Callback(this, hasProgressCallback, hasNewSegmentsCallback) : null
     );
   }
-  private WritableMap getTextSegments() {
-    Integer count = getTextSegmentCount(context);
+  private WritableMap getTextSegments(int start, int count) {
     StringBuilder builder = new StringBuilder();
     WritableMap data = Arguments.createMap();
@@ -424,28 +480,6 @@ public class WhisperContext {
     freeContext(context);
   }
-  public static float[] decodeWaveFile(InputStream inputStream) throws IOException {
-    ByteArrayOutputStream baos = new ByteArrayOutputStream();
-    byte[] buffer = new byte[1024];
-    int bytesRead;
-    while ((bytesRead = inputStream.read(buffer)) != -1) {
-      baos.write(buffer, 0, bytesRead);
-    }
-    ByteBuffer byteBuffer = ByteBuffer.wrap(baos.toByteArray());
-    byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
-    byteBuffer.position(44);
-    ShortBuffer shortBuffer = byteBuffer.asShortBuffer();
-    short[] shortArray = new short[shortBuffer.limit()];
-    shortBuffer.get(shortArray);
-    float[] floatArray = new float[shortArray.length];
-    for (int i = 0; i < shortArray.length; i++) {
-      floatArray[i] = ((float) shortArray[i]) / 32767.0f;
-      floatArray[i] = Math.max(floatArray[i], -1f);
-      floatArray[i] = Math.min(floatArray[i], 1f);
-    }
-    return floatArray;
-  }
   static {
     Log.d(NAME, "Primary ABI: " + Build.SUPPORTED_ABIS[0]);
     boolean loadVfpv4 = false;
@@ -513,6 +547,7 @@ public class WhisperContext {
   protected static native long initContext(String modelPath);
   protected static native long initContextWithAsset(AssetManager assetManager, String modelPath);
   protected static native long initContextWithInputStream(PushbackInputStream inputStream);
+  protected static native boolean vadSimple(float[] audio_data, int audio_data_len, float vad_thold, float vad_freq_thold);
   protected static native int fullTranscribe(
     int job_id,
     long context,
@@ -533,7 +568,7 @@ public class WhisperContext {
     boolean translate,
     String language,
     String prompt,
-    ProgressCallback progressCallback
+    Callback Callback
   );
   protected static native void abortTranscribe(int jobId);
   protected static native void abortAllTranscribe();

package/android/src/main/jni.cpp CHANGED Viewed

@@ -6,6 +6,7 @@
 #include <sys/sysinfo.h>
 #include <string>
 #include <thread>
+#include <vector>
 #include "whisper.h"
 #include "rn-whisper.h"
 #include "ggml.h"
@@ -184,9 +185,30 @@ Java_com_rnwhisper_WhisperContext_initContextWithInputStream(
     return reinterpret_cast<jlong>(context);
 }
-struct progress_callback_context {
+JNIEXPORT jboolean JNICALL
+Java_com_rnwhisper_WhisperContext_vadSimple(
+    JNIEnv *env,
+    jobject thiz,
+    jfloatArray audio_data,
+    jint audio_data_len,
+    jfloat vad_thold,
+    jfloat vad_freq_thold
+) {
+    UNUSED(thiz);
+    std::vector<float> samples(audio_data_len);
+    jfloat *audio_data_arr = env->GetFloatArrayElements(audio_data, nullptr);
+    for (int i = 0; i < audio_data_len; i++) {
+        samples[i] = audio_data_arr[i];
+    }
+    bool is_speech = rn_whisper_vad_simple(samples, WHISPER_SAMPLE_RATE, 1000, vad_thold, vad_freq_thold, false);
+    env->ReleaseFloatArrayElements(audio_data, audio_data_arr, JNI_ABORT);
+    return is_speech;
+}
+struct callback_context {
     JNIEnv *env;
-    jobject progress_callback_instance;
+    jobject callback_instance;
 };
 JNIEXPORT jint JNICALL
@@ -212,7 +234,7 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
     jboolean translate,
     jstring language,
     jstring prompt,
-    jobject progress_callback_instance
+    jobject callback_instance
 ) {
     UNUSED(thiz);
     struct whisper_context *context = reinterpret_cast<struct whisper_context *>(context_ptr);
@@ -280,19 +302,30 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
     };
     params.encoder_begin_callback_user_data = rn_whisper_assign_abort_map(job_id);
-    if (progress_callback_instance != nullptr) {
+    if (callback_instance != nullptr) {
+        callback_context *cb_ctx = new callback_context;
+        cb_ctx->env = env;
+        cb_ctx->callback_instance = env->NewGlobalRef(callback_instance);
         params.progress_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, int progress, void * user_data) {
-            progress_callback_context *cb_ctx = (progress_callback_context *)user_data;
+            callback_context *cb_ctx = (callback_context *)user_data;
             JNIEnv *env = cb_ctx->env;
-            jobject progress_callback_instance = cb_ctx->progress_callback_instance;
-            jclass progress_callback_class = env->GetObjectClass(progress_callback_instance);
-            jmethodID onProgress = env->GetMethodID(progress_callback_class, "onProgress", "(I)V");
-            env->CallVoidMethod(progress_callback_instance, onProgress, progress);
+            jobject callback_instance = cb_ctx->callback_instance;
+            jclass callback_class = env->GetObjectClass(callback_instance);
+            jmethodID onProgress = env->GetMethodID(callback_class, "onProgress", "(I)V");
+            env->CallVoidMethod(callback_instance, onProgress, progress);
         };
-        progress_callback_context *cb_ctx = new progress_callback_context;
-        cb_ctx->env = env;
-        cb_ctx->progress_callback_instance = env->NewGlobalRef(progress_callback_instance);
         params.progress_callback_user_data = cb_ctx;
+        params.new_segment_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, int n_new, void * user_data) {
+            callback_context *cb_ctx = (callback_context *)user_data;
+            JNIEnv *env = cb_ctx->env;
+            jobject callback_instance = cb_ctx->callback_instance;
+            jclass callback_class = env->GetObjectClass(callback_instance);
+            jmethodID onNewSegments = env->GetMethodID(callback_class, "onNewSegments", "(I)V");
+            env->CallVoidMethod(callback_instance, onNewSegments, n_new);
+        };
+        params.new_segment_callback_user_data = cb_ctx;
     }
     LOGI("About to reset timings");

package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java CHANGED Viewed

@@ -6,6 +6,7 @@ import com.facebook.react.bridge.Promise;
 import com.facebook.react.bridge.ReactApplicationContext;
 import com.facebook.react.bridge.ReactMethod;
 import com.facebook.react.bridge.ReadableMap;
+import com.facebook.react.bridge.ReadableArray;
 import com.facebook.react.module.annotations.ReactModule;
 import java.util.HashMap;
@@ -65,4 +66,29 @@ public class RNWhisperModule extends NativeRNWhisperSpec {
   public void releaseAllContexts(Promise promise) {
     rnwhisper.releaseAllContexts(promise);
   }
+  /*
+   * iOS Specific methods, left here for make the turbo module happy:
+   */
+  @ReactMethod
+  public void getAudioSessionCurrentCategory(Promise promise) {
+    promise.resolve(null);
+  }
+  @ReactMethod
+  public void getAudioSessionCurrentMode(Promise promise) {
+    promise.resolve(null);
+  }
+  @ReactMethod
+  public void setAudioSessionCategory(String category, ReadableArray options, Promise promise) {
+    promise.resolve(null);
+  }
+  @ReactMethod
+  public void setAudioSessionMode(String mode, Promise promise) {
+    promise.resolve(null);
+  }
+  @ReactMethod
+  public void setAudioSessionActive(boolean active, Promise promise) {
+    promise.resolve(null);
+  }
 }

package/cpp/rn-whisper.cpp CHANGED Viewed

@@ -38,4 +38,55 @@ void rn_whisper_abort_all_transcribe() {
   }
 }
+void high_pass_filter(std::vector<float> & data, float cutoff, float sample_rate) {
+    const float rc = 1.0f / (2.0f * M_PI * cutoff);
+    const float dt = 1.0f / sample_rate;
+    const float alpha = dt / (rc + dt);
+    float y = data[0];
+    for (size_t i = 1; i < data.size(); i++) {
+        y = alpha * (y + data[i] - data[i - 1]);
+        data[i] = y;
+    }
+}
+bool rn_whisper_vad_simple(std::vector<float> & pcmf32, int sample_rate, int last_ms, float vad_thold, float freq_thold, bool verbose) {
+  const int n_samples      = pcmf32.size();
+  const int n_samples_last = (sample_rate * last_ms) / 1000;
+  if (n_samples_last >= n_samples) {
+    // not enough samples - assume no speech
+    return false;
+  }
+  if (freq_thold > 0.0f) {
+    high_pass_filter(pcmf32, freq_thold, sample_rate);
+  }
+  float energy_all  = 0.0f;
+  float energy_last = 0.0f;
+  for (int i = 0; i < n_samples; i++) {
+    energy_all += fabsf(pcmf32[i]);
+    if (i >= n_samples - n_samples_last) {
+      energy_last += fabsf(pcmf32[i]);
+    }
+  }
+  energy_all  /= n_samples;
+  energy_last /= n_samples_last;
+  if (verbose) {
+    fprintf(stderr, "%s: energy_all: %f, energy_last: %f, vad_thold: %f, freq_thold: %f\n", __func__, energy_all, energy_last, vad_thold, freq_thold);
+  }
+  if (energy_last > vad_thold*energy_all) {
+    return false;
+  }
+  return true;
+}
 }

package/cpp/rn-whisper.h CHANGED Viewed

@@ -10,7 +10,8 @@ void rn_whisper_remove_abort_map(int job_id);
 void rn_whisper_abort_transcribe(int job_id);
 bool rn_whisper_transcribe_is_aborted(int job_id);
 void rn_whisper_abort_all_transcribe();
+bool rn_whisper_vad_simple(std::vector<float> & pcmf32, int sample_rate, int last_ms, float vad_thold, float freq_thold, bool verbose);
 #ifdef __cplusplus
 }
-#endif
+#endif