npm - whisper.rn - Versions diffs - 0.5.0-rc.0 → 0.5.0-rc.10 - Mend

whisper.rn 0.5.0-rc.0 → 0.5.0-rc.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (117) hide show

package/README.md CHANGED Viewed

@@ -11,9 +11,9 @@ React Native binding of [whisper.cpp](https://github.com/ggerganov/whisper.cpp).
 ## Screenshots
 | <img src="https://github.com/mybigday/whisper.rn/assets/3001525/2fea7b2d-c911-44fb-9afc-8efc7b594446" width="300" /> | <img src="https://github.com/mybigday/whisper.rn/assets/3001525/a5005a6c-44f7-4db9-95e8-0fd951a2e147" width="300" /> |
-| :------------------------------------------: | :------------------------------------------: |
-| iOS: Tested on iPhone 13 Pro Max | Android: Tested on Pixel 6 |
-| (tiny.en, Core ML enabled, release mode + archive) | (tiny.en, armv8.2-a+fp16, release mode) |
+| :------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------: |
+|                                           iOS: Tested on iPhone 13 Pro Max                                           |                                              Android: Tested on Pixel 6                                              |
+|                                  (tiny.en, Core ML enabled, release mode + archive)                                  |                                       (tiny.en, armv8.2-a+fp16, release mode)                                        |
 ## Installation
@@ -49,7 +49,9 @@ You will need to prebuild the project before using it. See [Expo guide](https://
 If you want to use realtime transcribe, you need to add the microphone permission to your app.
 ### iOS
-Add these lines to ```ios/[YOU_APP_NAME]/info.plist```
+Add these lines to `ios/[YOU_APP_NAME]/info.plist`
 ```xml
 <key>NSMicrophoneUsageDescription</key>
 <string>This app requires microphone access in order to transcribe speech</string>
@@ -58,10 +60,13 @@ Add these lines to ```ios/[YOU_APP_NAME]/info.plist```
 For tvOS, please note that the microphone is not supported.
 ### Android
-Add the following line to ```android/app/src/main/AndroidManifest.xml```
+Add the following line to `android/app/src/main/AndroidManifest.xml`
 ```xml
 <uses-permission android:name="android.permission.RECORD_AUDIO" />
 ```
 ## Tips & Tricks
 The [Tips & Tricks](docs/TIPS.md) document is a collection of tips and tricks for using `whisper.rn`.
@@ -83,24 +88,6 @@ const { result } = await promise
 // result: (The inference text result from audio file)
 ```
-Use realtime transcribe:
-```js
-const { stop, subscribe } = await whisperContext.transcribeRealtime(options)
-subscribe(evt => {
-  const { isCapturing, data, processTime, recordingTime } = evt
-  console.log(
-    `Realtime transcribing: ${isCapturing ? 'ON' : 'OFF'}\n` +
-      // The inference text result from audio record:
-      `Result: ${data.result}\n\n` +
-      `Process time: ${processTime}ms\n` +
-      `Recording time: ${recordingTime}ms`,
-  )
-  if (!isCapturing) console.log('Finished realtime transcribing')
-})
-```
 ## Voice Activity Detection (VAD)
 Voice Activity Detection allows you to detect speech segments in audio data using the Silero VAD model.
@@ -157,7 +144,11 @@ const segments = await vadContext.detectSpeechData(base64AudioData, {
 ```typescript
 segments.forEach((segment, index) => {
-  console.log(`Segment ${index + 1}: ${segment.t0.toFixed(2)}s - ${segment.t1.toFixed(2)}s`)
+  console.log(
+    `Segment ${index + 1}: ${segment.t0.toFixed(2)}s - ${segment.t1.toFixed(
+      2,
+    )}s`,
+  )
   console.log(`Duration: ${(segment.t1 - segment.t0).toFixed(2)}s`)
 })
 ```
@@ -170,35 +161,57 @@ await vadContext.release()
 await releaseAllWhisperVad()
 ```
-In iOS, You may need to change the Audio Session so that it can be used with other audio playback, or to optimize the quality of the recording. So we have provided AudioSession utilities for you:
+## Realtime Transcription
+The new `RealtimeTranscriber` provides enhanced realtime transcription with features like Voice Activity Detection (VAD), auto-slicing, and memory management.
-Option 1 - Use options in transcribeRealtime:
 ```js
-import { AudioSessionIos } from 'whisper.rn'
+// If your RN packager is not enable package exports support, use whisper.rn/src/realtime-transcription
+import { RealtimeTranscriber } from 'whisper.rn/realtime-transcription'
+import { AudioPcmStreamAdapter } from 'whisper.rn/realtime-transcription/adapters'
+import RNFS from 'react-native-fs' // or any compatible filesystem
-const { stop, subscribe } = await whisperContext.transcribeRealtime({
-  audioSessionOnStartIos: {
-    category: AudioSessionIos.Category.PlayAndRecord,
-    options: [AudioSessionIos.CategoryOption.MixWithOthers],
-    mode: AudioSessionIos.Mode.Default,
-  },
-  audioSessionOnStopIos: 'restore', // Or an AudioSessionSettingIos
+// Dependencies
+const whisperContext = await initWhisper({
+  /* ... */
+})
+const vadContext = await initWhisperVad({
+  /* ... */
 })
+const audioStream = new AudioPcmStreamAdapter() // requires @fugood/react-native-audio-pcm-stream
+// Create transcriber
+const transcriber = new RealtimeTranscriber(
+  { whisperContext, vadContext, audioStream, fs: RNFS },
+  {
+    audioSliceSec: 30,
+    vadPreset: 'default',
+    autoSliceOnSpeechEnd: true,
+    transcribeOptions: { language: 'en' },
+  },
+  {
+    onTranscribe: (event) => console.log('Transcription:', event.data?.result),
+    onVad: (event) => console.log('VAD:', event.type, event.confidence),
+    onStatusChange: (isActive) =>
+      console.log('Status:', isActive ? 'ACTIVE' : 'INACTIVE'),
+    onError: (error) => console.error('Error:', error),
+  },
+)
+// Start/stop transcription
+await transcriber.start()
+await transcriber.stop()
 ```
-Option 2 - Manage the Audio Session in anywhere:
-```js
-import { AudioSessionIos } from 'whisper.rn'
+**Dependencies:**
-await AudioSessionIos.setCategory(
-  AudioSessionIos.Category.PlayAndRecord, [AudioSessionIos.CategoryOption.MixWithOthers],
-)
-await AudioSessionIos.setMode(AudioSessionIos.Mode.Default)
-await AudioSessionIos.setActive(true)
-// Then you can start do recording
-```
+- `@fugood/react-native-audio-pcm-stream` for `AudioPcmStreamAdapter`
+- Compatible filesystem module (e.g., `react-native-fs`). See [filesystem interface](src/utils/WavFileWriter.ts#L9-L16) for TypeScript definition
-In Android, you may need to request the microphone permission by [`PermissionAndroid`](https://reactnative.dev/docs/permissionsandroid).
+**Custom Audio Adapters:**
+You can create custom audio stream adapters by implementing the [AudioStreamInterface](src/realtime-transcription/types.ts#L21-L30). This allows integration with different audio sources or custom audio processing pipelines.
+**Example:** See [complete example](example/src/RealtimeTranscriber.tsx) for full implementation including file simulation and UI.
 Please visit the [Documentation](docs/) for more details.
@@ -213,8 +226,10 @@ const whisperContext = await initWhisper({
   filePath: require('../assets/ggml-tiny.en.bin'),
 })
-const { stop, promise } =
-  whisperContext.transcribe(require('../assets/sample.wav'), options)
+const { stop, promise } = whisperContext.transcribe(
+  require('../assets/sample.wav'),
+  options,
+)
 // ...
 ```
@@ -233,18 +248,19 @@ module.exports = {
       ...defaultAssetExts,
       'bin', // whisper.rn: ggml model binary
       'mil', // whisper.rn: CoreML model asset
-    ]
+    ],
   },
 }
 ```
 Please note that:
 - It will significantly increase the size of the app in release mode.
 - The RN packager is not allowed file size larger than 2GB, so it not able to use original f16 `large` model (2.9GB), you can use quantized models instead.
 ## Core ML support
-__*Platform: iOS 15.0+, tvOS 15.0+*__
+**_Platform: iOS 15.0+, tvOS 15.0+_**
 To use Core ML on iOS, you will need to have the Core ML model files.
@@ -301,9 +317,71 @@ Please follow the [Development Workflow section of contributing guide](./CONTRIB
 We have provided a mock version of `whisper.rn` for testing purpose you can use on Jest:
 ```js
-jest.mock('whisper.rn', () => require('whisper.rn/jest/mock'))
+jest.mock('whisper.rn', () => require('whisper.rn/jest-mock'))
+```
+## Deprecated APIs
+### `transcribeRealtime` (Deprecated)
+> ⚠️ **Deprecated**: Use `RealtimeTranscriber` instead for enhanced features and better performance.
+```js
+const { stop, subscribe } = await whisperContext.transcribeRealtime(options)
+subscribe((evt) => {
+  const { isCapturing, data, processTime, recordingTime } = evt
+  console.log(
+    `Realtime transcribing: ${isCapturing ? 'ON' : 'OFF'}\n` +
+      `Result: ${data.result}\n\n` +
+      `Process time: ${processTime}ms\n` +
+      `Recording time: ${recordingTime}ms`,
+  )
+  if (!isCapturing) console.log('Finished realtime transcribing')
+})
 ```
+In iOS, You may need to change the Audio Session so that it can be used with other audio playback, or to optimize the quality of the recording. So we have provided AudioSession utilities for you:
+Option 1 - Use options in transcribeRealtime:
+```js
+import { AudioSessionIos } from 'whisper.rn'
+const { stop, subscribe } = await whisperContext.transcribeRealtime({
+  audioSessionOnStartIos: {
+    category: AudioSessionIos.Category.PlayAndRecord,
+    options: [AudioSessionIos.CategoryOption.MixWithOthers],
+    mode: AudioSessionIos.Mode.Default,
+  },
+  audioSessionOnStopIos: 'restore', // Or an AudioSessionSettingIos
+})
+```
+Option 2 - Manage the Audio Session in anywhere:
+```js
+import { AudioSessionIos } from 'whisper.rn'
+await AudioSessionIos.setCategory(AudioSessionIos.Category.PlayAndRecord, [
+  AudioSessionIos.CategoryOption.MixWithOthers,
+])
+await AudioSessionIos.setMode(AudioSessionIos.Mode.Default)
+await AudioSessionIos.setActive(true)
+// Then you can start do recording
+```
+In Android, you may need to request the microphone permission by [`PermissionAndroid`](https://reactnative.dev/docs/permissionsandroid).
+## Apps using `whisper.rn`
+- [BRICKS](https://bricks.tools): Our product for building interactive signage in simple way. We provide LLM functions as Generator LLM/Assistant.
+- ... (Any Contribution is welcome)
+## Node.js binding
+- [whisper.node](https://github.com/mybigday/whisper.node): An another Node.js binding of `whisper.cpp` but made API same as `whisper.rn`.
 ## Contributing
 See the [contributing guide](CONTRIBUTING.md) to learn how to contribute to the repository and the development workflow.

package/android/build.gradle CHANGED Viewed

@@ -102,6 +102,7 @@ android {
       "**/libreactnative.so",
       "**/libreactnativejni.so",
       "**/libturbomodulejsijni.so",
+      "**/libreact_nativemodule_core.so",
     ]
   }

package/android/src/main/CMakeLists.txt CHANGED Viewed

@@ -110,6 +110,7 @@ build_library("rnwhisper" "generic" "")
 if (${ANDROID_ABI} STREQUAL "arm64-v8a")
     build_library("rnwhisper_v8fp16_va_2" "arm" "-march=armv8.2-a+fp16")
+    build_library("rnwhisper_v8" "arm" "-march=armv8-a")
 elseif (${ANDROID_ABI} STREQUAL "armeabi-v7a")
     build_library("rnwhisper_vfpv4" "arm" "-mfpu=neon-vfpv4")
 elseif (${ANDROID_ABI} STREQUAL "x86_64")

package/android/src/main/java/com/rnwhisper/RNWhisper.java CHANGED Viewed

@@ -64,6 +64,10 @@ public class RNWhisper implements LifecycleEventListener {
   }
   public void installJSIBindings(Promise promise) {
+    if (!WhisperContext.isNativeLibraryLoaded()) {
+      promise.reject("Native library not loaded");
+      return;
+    }
     AsyncTask task = new AsyncTask<Void, Void, Void>() {
       private Exception exception;
@@ -95,6 +99,37 @@ public class RNWhisper implements LifecycleEventListener {
     tasks.put(task, "installJSIBindings");
   }
+  public void toggleNativeLog(boolean enabled, Promise promise) {
+    if (!WhisperContext.isNativeLibraryLoaded()) {
+      promise.reject("Native library not loaded");
+      return;
+    }
+    new AsyncTask<Void, Void, Boolean>() {
+      private Exception exception;
+      @Override
+      protected Boolean doInBackground(Void... voids) {
+        try {
+          WhisperContext.toggleNativeLog(reactContext, enabled);
+          return true;
+        } catch (Exception e) {
+          exception = e;
+        }
+        return null;
+      }
+      @Override
+      protected void onPostExecute(Boolean result) {
+        if (exception != null) {
+          promise.reject(exception);
+          return;
+        }
+        promise.resolve(result);
+      }
+    }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
+  }
   private int getResourceIdentifier(String filePath) {
     int identifier = reactContext.getResources().getIdentifier(
       filePath,

package/android/src/main/java/com/rnwhisper/WhisperContext.java CHANGED Viewed

@@ -29,6 +29,29 @@ public class WhisperContext {
   private static String loadedLibrary = "";
+  private static class NativeLogCallback {
+    DeviceEventManagerModule.RCTDeviceEventEmitter eventEmitter;
+    public NativeLogCallback(ReactApplicationContext reactContext) {
+      this.eventEmitter = reactContext.getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter.class);
+    }
+    void emitNativeLog(String level, String text) {
+      WritableMap event = Arguments.createMap();
+      event.putString("level", level);
+      event.putString("text", text);
+      eventEmitter.emit("@RNWhisper_onNativeLog", event);
+    }
+  }
+  static void toggleNativeLog(ReactApplicationContext reactContext, boolean enabled) {
+    if (enabled) {
+      setupLog(new NativeLogCallback(reactContext));
+    } else {
+      unsetLog();
+    }
+  }
   private static final int SAMPLE_RATE = 16000;
   private static final int CHANNEL_CONFIG = AudioFormat.CHANNEL_IN_MONO;
   private static final int AUDIO_FORMAT = AudioFormat.ENCODING_PCM_16BIT;
@@ -454,6 +477,10 @@ public class WhisperContext {
         Log.d(NAME, "Loading librnwhisper_v8fp16_va_2.so");
         System.loadLibrary("rnwhisper_v8fp16_va_2");
         loadedLibrary = "rnwhisper_v8fp16_va_2";
+      } else {
+        Log.d(NAME, "Loading librnwhisper_v8.so");
+        System.loadLibrary("rnwhisper_v8");
+        loadedLibrary = "rnwhisper_v8";
       }
     } else if (WhisperContext.isArmeabiV7a()) {
       Log.d(NAME, "Loading librnwhisper_vfpv4.so");
@@ -468,6 +495,10 @@ public class WhisperContext {
     }
   }
+  public static boolean isNativeLibraryLoaded() {
+    return loadedLibrary != "";
+  }
   public static boolean isArm64V8a() {
     return Build.SUPPORTED_ABIS[0].equals("arm64-v8a");
   }
@@ -571,4 +602,6 @@ public class WhisperContext {
   // JSI Installation
   protected static native void installJSIBindings(long runtimePtr, Object callInvokerHolder);
   protected static native void cleanupJSIBindings();
+  protected static native void setupLog(NativeLogCallback logCallback);
+  protected static native void unsetLog();
 }

package/android/src/main/jni.cpp CHANGED Viewed

@@ -23,6 +23,64 @@
 #define LOGI(...) __android_log_print(ANDROID_LOG_INFO,     TAG, __VA_ARGS__)
 #define LOGW(...) __android_log_print(ANDROID_LOG_WARN,     TAG, __VA_ARGS__)
+struct log_callback_context {
+    JavaVM *jvm;
+    jobject callback;
+};
+static void rnwhisper_log_callback_default(enum wsp_ggml_log_level level, const char * fmt, void * data) {
+    if (level == WSP_GGML_LOG_LEVEL_ERROR)     __android_log_print(ANDROID_LOG_ERROR, TAG, fmt, data);
+    else if (level == WSP_GGML_LOG_LEVEL_INFO) __android_log_print(ANDROID_LOG_INFO, TAG, fmt, data);
+    else if (level == WSP_GGML_LOG_LEVEL_WARN) __android_log_print(ANDROID_LOG_WARN, TAG, fmt, data);
+    else __android_log_print(ANDROID_LOG_DEFAULT, TAG, fmt, data);
+}
+static void rnwhisper_log_callback_to_j(enum wsp_ggml_log_level level, const char * text, void * data) {
+    const char* level_c = "";
+    if (level == WSP_GGML_LOG_LEVEL_ERROR) {
+        __android_log_print(ANDROID_LOG_ERROR, TAG, text, nullptr);
+        level_c = "error";
+    } else if (level == WSP_GGML_LOG_LEVEL_INFO) {
+        __android_log_print(ANDROID_LOG_INFO, TAG, text, nullptr);
+        level_c = "info";
+    } else if (level == WSP_GGML_LOG_LEVEL_WARN) {
+        __android_log_print(ANDROID_LOG_WARN, TAG, text, nullptr);
+        level_c = "warn";
+    } else {
+        __android_log_print(ANDROID_LOG_DEFAULT, TAG, text, nullptr);
+    }
+    log_callback_context *cb_ctx = (log_callback_context *) data;
+    JNIEnv *env;
+    bool need_detach = false;
+    int getEnvResult = cb_ctx->jvm->GetEnv((void**)&env, JNI_VERSION_1_6);
+    if (getEnvResult == JNI_EDETACHED) {
+        if (cb_ctx->jvm->AttachCurrentThread(&env, nullptr) == JNI_OK) {
+            need_detach = true;
+        } else {
+            return;
+        }
+    } else if (getEnvResult != JNI_OK) {
+        return;
+    }
+    jobject callback = cb_ctx->callback;
+    jclass cb_class = env->GetObjectClass(callback);
+    jmethodID emitNativeLog = env->GetMethodID(cb_class, "emitNativeLog", "(Ljava/lang/String;Ljava/lang/String;)V");
+    jstring level_str = env->NewStringUTF(level_c);
+    jstring text_str = env->NewStringUTF(text);
+    env->CallVoidMethod(callback, emitNativeLog, level_str, text_str);
+    env->DeleteLocalRef(level_str);
+    env->DeleteLocalRef(text_str);
+    if (need_detach) {
+        cb_ctx->jvm->DetachCurrentThread();
+    }
+}
 static inline int min(int a, int b) {
     return (a < b) ? a : b;
 }
@@ -800,7 +858,30 @@ Java_com_rnwhisper_WhisperContext_cleanupJSIBindings(
     JNIEnv *env,
     jclass clazz
 ) {
+    UNUSED(env);
+    UNUSED(clazz);
     rnwhisper_jsi::cleanupJSIBindings();
 }
+JNIEXPORT void JNICALL
+Java_com_rnwhisper_WhisperContext_setupLog(JNIEnv *env, jobject thiz, jobject logCallback) {
+    UNUSED(thiz);
+    log_callback_context *cb_ctx = new log_callback_context;
+    JavaVM *jvm;
+    env->GetJavaVM(&jvm);
+    cb_ctx->jvm = jvm;
+    cb_ctx->callback = env->NewGlobalRef(logCallback);
+    whisper_log_set(rnwhisper_log_callback_to_j, cb_ctx);
+}
+JNIEXPORT void JNICALL
+Java_com_rnwhisper_WhisperContext_unsetLog(JNIEnv *env, jobject thiz) {
+    UNUSED(env);
+    UNUSED(thiz);
+    whisper_log_set(rnwhisper_log_callback_default, NULL);
+}
 } // extern "C"

package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java CHANGED Viewed

@@ -31,6 +31,11 @@ public class RNWhisperModule extends NativeRNWhisperSpec {
     rnwhisper.installJSIBindings(promise);
   }
+  @ReactMethod
+  public void toggleNativeLog(boolean enabled, Promise promise) {
+    rnwhisper.toggleNativeLog(enabled, promise);
+  }
   @Override
   @NonNull
   public String getName() {

package/android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java CHANGED Viewed

@@ -31,6 +31,11 @@ public class RNWhisperModule extends ReactContextBaseJavaModule {
     rnwhisper.installJSIBindings(promise);
   }
+  @ReactMethod
+  public void toggleNativeLog(boolean enabled, Promise promise) {
+    rnwhisper.toggleNativeLog(enabled, promise);
+  }
   @Override
   @NonNull
   public String getName() {

package/cpp/jsi/RNWhisperJSI.cpp CHANGED Viewed

@@ -295,6 +295,38 @@ CallbackInfo extractCallbacks(Runtime& runtime, const Object& optionsObj) {
     return info;
 }
+// Helper function to extract VAD parameters from options
+whisper_vad_params extractVadParams(Runtime& runtime, const Object& optionsObj) {
+    whisper_vad_params vadParams = whisper_vad_default_params();
+    try {
+        auto propNames = optionsObj.getPropertyNames(runtime);
+        for (size_t i = 0; i < propNames.size(runtime); i++) {
+            auto propNameValue = propNames.getValueAtIndex(runtime, i);
+            std::string propName = propNameValue.getString(runtime).utf8(runtime);
+            Value propValue = optionsObj.getProperty(runtime, propNameValue.getString(runtime));
+            if (propName == "threshold" && propValue.isNumber()) {
+                vadParams.threshold = (float)propValue.getNumber();
+            } else if (propName == "minSpeechDurationMs" && propValue.isNumber()) {
+                vadParams.min_speech_duration_ms = (int)propValue.getNumber();
+            } else if (propName == "minSilenceDurationMs" && propValue.isNumber()) {
+                vadParams.min_silence_duration_ms = (int)propValue.getNumber();
+            } else if (propName == "maxSpeechDurationS" && propValue.isNumber()) {
+                vadParams.max_speech_duration_s = (float)propValue.getNumber();
+            } else if (propName == "speechPadMs" && propValue.isNumber()) {
+                vadParams.speech_pad_ms = (int)propValue.getNumber();
+            } else if (propName == "samplesOverlap" && propValue.isNumber()) {
+                vadParams.samples_overlap = (float)propValue.getNumber();
+            }
+        }
+    } catch (...) {
+        // Ignore parameter extraction errors
+    }
+    return vadParams;
+}
 // Helper function to create segments array
 Array createSegmentsArray(Runtime& runtime, struct whisper_context* ctx, int offset) {
     int n_segments = whisper_full_n_segments(ctx);
@@ -355,10 +387,13 @@ Value createPromiseTask(
     whisper_full_params params = {};
     CallbackInfo callbackInfo = {};
+    whisper_vad_params vadParams = {};
     if (functionName == "whisperTranscribeData") {
         params = createFullParamsFromJSI(runtime, optionsObj);
         // Extract data from optionsObj before lambda capture
         callbackInfo = extractCallbacks(runtime, optionsObj);
+    } else if (functionName == "whisperVadDetectSpeech") {
+        vadParams = extractVadParams(runtime, optionsObj);
     }
     // Create promise
@@ -368,7 +403,7 @@ Value createPromiseTask(
         runtime,
         PropNameID::forAscii(runtime, ""),
         2, // resolve, reject
-        [contextId, audioResult, params, callbackInfo, task, callInvoker, functionName](Runtime& runtime, const Value& thisValue, const Value* arguments, size_t count) -> Value {
+        [contextId, audioResult, params, callbackInfo, vadParams, task, callInvoker, functionName](Runtime& runtime, const Value& thisValue, const Value* arguments, size_t count) -> Value {
             if (count != 2) {
                 throw JSError(runtime, "Promise executor expects 2 arguments (resolve, reject)");
             }
@@ -379,10 +414,10 @@ Value createPromiseTask(
             // Execute task in ThreadPool
             auto future = getWhisperThreadPool().enqueue([
-                contextId, audioResult, params, callbackInfo, task, resolvePtr, rejectPtr, callInvoker, safeRuntime, functionName]() {
+                contextId, audioResult, params, callbackInfo, vadParams, task, resolvePtr, rejectPtr, callInvoker, safeRuntime, functionName]() {
                 try {
-                    task(contextId, audioResult, params, callbackInfo, resolvePtr, rejectPtr, callInvoker, safeRuntime);
+                    task(contextId, audioResult, params, callbackInfo, vadParams, resolvePtr, rejectPtr, callInvoker, safeRuntime);
                 } catch (...) {
                     callInvoker->invokeAsync([rejectPtr, safeRuntime, functionName]() {
                         auto& runtime = *safeRuntime;
@@ -413,7 +448,7 @@ void installJSIBindings(
                 try {
                     return createPromiseTask<whisper_context>(
                         runtime, "whisperTranscribeData", callInvoker, arguments, count,
-                        [](int contextId, const AudioData& audioResult, const whisper_full_params& params, const CallbackInfo& callbackInfo,
+                        [](int contextId, const AudioData& audioResult, const whisper_full_params& params, const CallbackInfo& callbackInfo, const whisper_vad_params& vadParams,
                            std::shared_ptr<Function> resolvePtr, std::shared_ptr<Function> rejectPtr,
                            std::shared_ptr<facebook::react::CallInvoker> callInvoker,
                            std::shared_ptr<Runtime> safeRuntime) {
@@ -566,7 +601,7 @@ void installJSIBindings(
                 try {
                     return createPromiseTask<whisper_vad_context>(
                         runtime, "whisperVadDetectSpeech", callInvoker, arguments, count,
-                        [](int contextId, const AudioData& audioResult, const whisper_full_params& params, const CallbackInfo& callbackInfo,
+                        [](int contextId, const AudioData& audioResult, const whisper_full_params& params, const CallbackInfo& callbackInfo, const whisper_vad_params& vadParams,
                            std::shared_ptr<Function> resolvePtr, std::shared_ptr<Function> rejectPtr,
                            std::shared_ptr<facebook::react::CallInvoker> callInvoker,
                            std::shared_ptr<Runtime> safeRuntime) {
@@ -600,7 +635,8 @@ void installJSIBindings(
                             bool isSpeech = whisper_vad_detect_speech(vadContext, audioResult.data.data(), audioResult.count);
                             logInfo("VAD detection result: %s", isSpeech ? "speech" : "no speech");
-                            struct whisper_vad_params vad_params = whisper_vad_default_params();
+                            struct whisper_vad_params vad_params = vadParams;
                             struct whisper_vad_segments* segments = nullptr;
                             if (isSpeech) {
                                 segments = whisper_vad_segments_from_probs(vadContext, vad_params);

package/ios/RNWhisper.mm CHANGED Viewed

@@ -24,6 +24,16 @@ RCT_EXPORT_MODULE()
   return NO;
 }
+RCT_EXPORT_METHOD(toggleNativeLog:(BOOL)enabled) {
+    void (^onEmitLog)(NSString *level, NSString *text) = nil;
+    if (enabled) {
+        onEmitLog = ^(NSString *level, NSString *text) {
+            [self sendEventWithName:@"@RNWhisper_onNativeLog" body:@{ @"level": level, @"text": text }];
+        };
+    }
+    [RNWhisperContext toggleNativeLog:enabled onEmitLog:onEmitLog];
+}
 - (NSDictionary *)constantsToExport
 {
   return @{
@@ -107,6 +117,7 @@ RCT_REMAP_METHOD(initContext,
     @"@RNWhisper_onTranscribeNewSegments",
     @"@RNWhisper_onRealtimeTranscribe",
     @"@RNWhisper_onRealtimeTranscribeEnd",
+    @"@RNWhisper_onNativeLog",
   ];
 }

package/ios/RNWhisperContext.h CHANGED Viewed

@@ -47,6 +47,7 @@ typedef struct {
     bool isMetalEnabled;
 }
++ (void)toggleNativeLog:(BOOL)enabled onEmitLog:(void (^)(NSString *level, NSString *text))onEmitLog;
 + (instancetype)initWithModelPath:(NSString *)modelPath contextId:(int)contextId noCoreML:(BOOL)noCoreML noMetal:(BOOL)noMetal useFlashAttn:(BOOL)useFlashAttn;
 - (bool)isMetalEnabled;
 - (NSString *)reasonNoMetal;