npm - whisper.rn - Versions diffs - 0.3.7 → 0.3.8 - Mend

whisper.rn 0.3.7 → 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/README.md +28 -0
package/android/src/main/java/com/rnwhisper/AudioUtils.java +119 -0
package/android/src/main/java/com/rnwhisper/WhisperContext.java +37 -116
package/android/src/main/jni.cpp +23 -12
package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +26 -0
package/ios/RNWhisper.mm +81 -22
package/ios/RNWhisper.xcodeproj/project.pbxproj +6 -0
package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcuserdata/jhen.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
package/ios/RNWhisperAudioSessionUtils.h +13 -0
package/ios/RNWhisperAudioSessionUtils.m +85 -0
package/ios/RNWhisperAudioUtils.h +1 -0
package/ios/RNWhisperAudioUtils.m +21 -0
package/ios/RNWhisperContext.h +1 -0
package/ios/RNWhisperContext.mm +56 -28
package/lib/commonjs/AudioSessionIos.js +91 -0
package/lib/commonjs/AudioSessionIos.js.map +1 -0
package/lib/commonjs/NativeRNWhisper.js.map +1 -1
package/lib/commonjs/index.js +82 -14
package/lib/commonjs/index.js.map +1 -1
package/lib/module/AudioSessionIos.js +83 -0
package/lib/module/AudioSessionIos.js.map +1 -0
package/lib/module/NativeRNWhisper.js.map +1 -1
package/lib/module/index.js +77 -14
package/lib/module/index.js.map +1 -1
package/lib/typescript/AudioSessionIos.d.ts +54 -0
package/lib/typescript/AudioSessionIos.d.ts.map +1 -0
package/lib/typescript/NativeRNWhisper.d.ts +8 -0
package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
package/lib/typescript/index.d.ts +37 -2
package/lib/typescript/index.d.ts.map +1 -1
package/package.json +1 -1
package/src/AudioSessionIos.ts +90 -0
package/src/NativeRNWhisper.ts +11 -1
package/src/index.ts +153 -26

package/README.md CHANGED Viewed

@@ -99,6 +99,34 @@ subscribe(evt => {
 })
 ```
+In iOS, You may need to change the Audio Session so that it can be used with other audio playback, or to optimize the quality of the recording. So we have provided AudioSession utilities for you:
+Option 1 - Use options in transcribeRealtime:
+```js
+import { AudioSessionIos } from 'whisper.rn'
+const { stop, subscribe } = await whisperContext.transcribeRealtime({
+  audioSessionOnStartIos: {
+    category: AudioSessionIos.Category.PlayAndRecord,
+    options: [AudioSessionIos.CategoryOption.MixWithOthers],
+    mode: AudioSessionIos.Mode.Default,
+  },
+  audioSessionOnStopIos: 'restore', // Or an AudioSessionSettingIos
+})
+```
+Option 2 - Manage the Audio Session in anywhere:
+```js
+import { AudioSessionIos } from 'whisper.rn'
+await AudioSessionIos.setCategory(
+  AudioSessionIos.Category.PlayAndRecord, [AudioSessionIos.CategoryOption.MixWithOthers],
+)
+await AudioSessionIos.setMode(AudioSessionIos.Mode.Default)
+await AudioSessionIos.setActive(true)
+// Then you can start do recording
+```
 In Android, you may need to request the microphone permission by [`PermissionAndroid`](https://reactnative.dev/docs/permissionsandroid).
 Please visit the [Documentation](docs/) for more details.

package/android/src/main/java/com/rnwhisper/AudioUtils.java ADDED Viewed

@@ -0,0 +1,119 @@
+package com.rnwhisper;
+import android.util.Log;
+import java.util.ArrayList;
+import java.lang.StringBuilder;
+import java.io.IOException;
+import java.io.FileReader;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.ShortBuffer;
+public class AudioUtils {
+  private static final String NAME = "RNWhisperAudioUtils";
+  private static final int SAMPLE_RATE = 16000;
+  private static byte[] shortToByte(short[] shortInts) {
+    int j = 0;
+    int length = shortInts.length;
+    byte[] byteData = new byte[length * 2];
+    for (int i = 0; i < length; i++) {
+      byteData[j++] = (byte) (shortInts[i] >>> 8);
+      byteData[j++] = (byte) (shortInts[i] >>> 0);
+    }
+    return byteData;
+  }
+  public static byte[] concatShortBuffers(ArrayList<short[]> buffers) {
+    int totalLength = 0;
+    for (int i = 0; i < buffers.size(); i++) {
+      totalLength += buffers.get(i).length;
+    }
+    byte[] result = new byte[totalLength * 2];
+    int offset = 0;
+    for (int i = 0; i < buffers.size(); i++) {
+      byte[] bytes = shortToByte(buffers.get(i));
+      System.arraycopy(bytes, 0, result, offset, bytes.length);
+      offset += bytes.length;
+    }
+    return result;
+  }
+  private static byte[] removeTrailingZeros(byte[] audioData) {
+    int i = audioData.length - 1;
+    while (i >= 0 && audioData[i] == 0) {
+      --i;
+    }
+    byte[] newData = new byte[i + 1];
+    System.arraycopy(audioData, 0, newData, 0, i + 1);
+    return newData;
+  }
+  public static void saveWavFile(byte[] rawData, String audioOutputFile) throws IOException {
+    Log.d(NAME, "call saveWavFile");
+    rawData = removeTrailingZeros(rawData);
+    DataOutputStream output = null;
+    try {
+      output = new DataOutputStream(new FileOutputStream(audioOutputFile));
+      // WAVE header
+      // see http://ccrma.stanford.edu/courses/422/projects/WaveFormat/
+      output.writeBytes("RIFF"); // chunk id
+      output.writeInt(Integer.reverseBytes(36 + rawData.length)); // chunk size
+      output.writeBytes("WAVE"); // format
+      output.writeBytes("fmt "); // subchunk 1 id
+      output.writeInt(Integer.reverseBytes(16)); // subchunk 1 size
+      output.writeShort(Short.reverseBytes((short) 1)); // audio format (1 = PCM)
+      output.writeShort(Short.reverseBytes((short) 1)); // number of channels
+      output.writeInt(Integer.reverseBytes(SAMPLE_RATE)); // sample rate
+      output.writeInt(Integer.reverseBytes(SAMPLE_RATE * 2)); // byte rate
+      output.writeShort(Short.reverseBytes((short) 2)); // block align
+      output.writeShort(Short.reverseBytes((short) 16)); // bits per sample
+      output.writeBytes("data"); // subchunk 2 id
+      output.writeInt(Integer.reverseBytes(rawData.length)); // subchunk 2 size
+      // Audio data (conversion big endian -> little endian)
+      short[] shorts = new short[rawData.length / 2];
+      ByteBuffer.wrap(rawData).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shorts);
+      ByteBuffer bytes = ByteBuffer.allocate(shorts.length * 2);
+      for (short s : shorts) {
+        bytes.putShort(s);
+      }
+      Log.d(NAME, "writing audio file: " + audioOutputFile);
+      output.write(bytes.array());
+    } finally {
+      if (output != null) {
+        output.close();
+      }
+    }
+  }
+  public static float[] decodeWaveFile(InputStream inputStream) throws IOException {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    byte[] buffer = new byte[1024];
+    int bytesRead;
+    while ((bytesRead = inputStream.read(buffer)) != -1) {
+      baos.write(buffer, 0, bytesRead);
+    }
+    ByteBuffer byteBuffer = ByteBuffer.wrap(baos.toByteArray());
+    byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
+    byteBuffer.position(44);
+    ShortBuffer shortBuffer = byteBuffer.asShortBuffer();
+    short[] shortArray = new short[shortBuffer.limit()];
+    shortBuffer.get(shortArray);
+    float[] floatArray = new float[shortArray.length];
+    for (int i = 0; i < shortArray.length; i++) {
+      floatArray[i] = ((float) shortArray[i]) / 32767.0f;
+      floatArray[i] = Math.max(floatArray[i], -1f);
+      floatArray[i] = Math.min(floatArray[i], 1f);
+    }
+    return floatArray;
+  }
+}

package/android/src/main/java/com/rnwhisper/WhisperContext.java CHANGED Viewed

@@ -14,24 +14,15 @@ import android.media.AudioFormat;
 import android.media.AudioRecord;
 import android.media.MediaRecorder.AudioSource;
-import java.util.Random;
 import java.util.ArrayList;
 import java.lang.StringBuilder;
-import java.io.File;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.FileReader;
-import java.io.ByteArrayOutputStream;
 import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.PushbackInputStream;
-import java.nio.ByteBuffer;
-import java.nio.ByteOrder;
-import java.nio.ShortBuffer;
 public class WhisperContext {
   public static final String NAME = "RNWhisperContext";
@@ -88,80 +79,6 @@ public class WhisperContext {
     fullHandler = null;
   }
-  public byte[] shortToByte(short[] shortInts) {
-    int j = 0;
-    int length = shortInts.length;
-    byte[] byteData = new byte[length * 2];
-    for (int i = 0; i < length; i++) {
-      byteData[j++] = (byte) (shortInts[i] >>> 8);
-      byteData[j++] = (byte) (shortInts[i] >>> 0);
-    }
-    return byteData;
-  }
-  public byte[] concatShortBuffers(ArrayList<short[]> buffers) {
-    int totalLength = 0;
-    for (int i = 0; i < buffers.size(); i++) {
-      totalLength += buffers.get(i).length;
-    }
-    byte[] result = new byte[totalLength * 2];
-    int offset = 0;
-    for (int i = 0; i < buffers.size(); i++) {
-      byte[] bytes = shortToByte(buffers.get(i));
-      System.arraycopy(bytes, 0, result, offset, bytes.length);
-      offset += bytes.length;
-    }
-    return result;
-  }
-  public byte[] removeTrailingZeros(byte[] audioData) {
-    int i = audioData.length - 1;
-    while (i >= 0 && audioData[i] == 0) {
-      --i;
-    }
-    byte[] newData = new byte[i + 1];
-    System.arraycopy(audioData, 0, newData, 0, i + 1);
-    return newData;
-  }
-  private void saveWavFile(byte[] rawData, String audioOutputFile) throws IOException {
-    Log.d(NAME, "call saveWavFile");
-    rawData = removeTrailingZeros(rawData);
-    DataOutputStream output = null;
-    try {
-      output = new DataOutputStream(new FileOutputStream(audioOutputFile));
-      // WAVE header
-      // see http://ccrma.stanford.edu/courses/422/projects/WaveFormat/
-      output.writeBytes("RIFF"); // chunk id
-      output.writeInt(Integer.reverseBytes(36 + rawData.length)); // chunk size
-      output.writeBytes("WAVE"); // format
-      output.writeBytes("fmt "); // subchunk 1 id
-      output.writeInt(Integer.reverseBytes(16)); // subchunk 1 size
-      output.writeShort(Short.reverseBytes((short) 1)); // audio format (1 = PCM)
-      output.writeShort(Short.reverseBytes((short) 1)); // number of channels
-      output.writeInt(Integer.reverseBytes(SAMPLE_RATE)); // sample rate
-      output.writeInt(Integer.reverseBytes(SAMPLE_RATE * 2)); // byte rate
-      output.writeShort(Short.reverseBytes((short) 2)); // block align
-      output.writeShort(Short.reverseBytes((short) 16)); // bits per sample
-      output.writeBytes("data"); // subchunk 2 id
-      output.writeInt(Integer.reverseBytes(rawData.length)); // subchunk 2 size
-      // Audio data (conversion big endian -> little endian)
-      short[] shorts = new short[rawData.length / 2];
-      ByteBuffer.wrap(rawData).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shorts);
-      ByteBuffer bytes = ByteBuffer.allocate(shorts.length * 2);
-      for (short s : shorts) {
-        bytes.putShort(s);
-      }
-      Log.d(NAME, "writing audio file: " + audioOutputFile);
-      output.write(bytes.array());
-    } finally {
-      if (output != null) {
-        output.close();
-      }
-    }
-  }
   private boolean vad(ReadableMap options, short[] shortBuffer, int nSamples, int n) {
     boolean isSpeech = true;
     if (!isTranscribing && options.hasKey("useVad") && options.getBoolean("useVad")) {
@@ -295,7 +212,7 @@ public class WhisperContext {
           }
           // TODO: Append in real time so we don't need to keep all slices & also reduce memory usage
           Log.d(NAME, "Begin saving wav file to " + audioOutputPath);
-          saveWavFile(concatShortBuffers(shortBufferSlices), audioOutputPath);
+          AudioUtils.saveWavFile(AudioUtils.concatShortBuffers(shortBufferSlices), audioOutputPath);
           if (!isTranscribing) {
             emitTranscribeEvent("@RNWhisper_onRealtimeTranscribeEnd", Arguments.createMap());
           }
@@ -346,7 +263,7 @@ public class WhisperContext {
     payload.putInt("sliceIndex", transcribeSliceIndex);
     if (code == 0) {
-      payload.putMap("data", getTextSegments());
+      payload.putMap("data", getTextSegments(0, getTextSegmentCount(context)));
     } else {
       payload.putString("error", "Transcribe failed with code " + code);
     }
@@ -406,16 +323,41 @@ public class WhisperContext {
     eventEmitter.emit("@RNWhisper_onTranscribeProgress", event);
   }
-  private static class ProgressCallback {
+  private void emitNewSegments(WritableMap result) {
+    WritableMap event = Arguments.createMap();
+    event.putInt("contextId", WhisperContext.this.id);
+    event.putInt("jobId", jobId);
+    event.putMap("result", result);
+    eventEmitter.emit("@RNWhisper_onTranscribeNewSegments", event);
+  }
+  private static class Callback {
     WhisperContext context;
+    boolean emitProgressNeeded = false;
+    boolean emitNewSegmentsNeeded = false;
+    int totalNNew = 0;
-    public ProgressCallback(WhisperContext context) {
+    public Callback(WhisperContext context, boolean emitProgressNeeded, boolean emitNewSegmentsNeeded) {
       this.context = context;
+      this.emitProgressNeeded = emitProgressNeeded;
+      this.emitNewSegmentsNeeded = emitNewSegmentsNeeded;
     }
     void onProgress(int progress) {
+      if (!emitProgressNeeded) return;
       context.emitProgress(progress);
     }
+    void onNewSegments(int nNew) {
+      Log.d(NAME, "onNewSegments: " + nNew);
+      totalNNew += nNew;
+      if (!emitNewSegmentsNeeded) return;
+      WritableMap result = context.getTextSegments(totalNNew - nNew, totalNNew);
+      result.putInt("nNew", nNew);
+      result.putInt("totalNNew", totalNNew);
+      context.emitNewSegments(result);
+    }
   }
   public WritableMap transcribeInputStream(int jobId, InputStream inputStream, ReadableMap options) throws IOException, Exception {
@@ -426,19 +368,21 @@ public class WhisperContext {
     this.jobId = jobId;
     isTranscribing = true;
-    float[] audioData = decodeWaveFile(inputStream);
+    float[] audioData = AudioUtils.decodeWaveFile(inputStream);
     int code = full(jobId, options, audioData, audioData.length);
     isTranscribing = false;
     this.jobId = -1;
     if (code != 0) {
       throw new Exception("Failed to transcribe the file. Code: " + code);
     }
-    WritableMap result = getTextSegments();
+    WritableMap result = getTextSegments(0, getTextSegmentCount(context));
     result.putBoolean("isAborted", isStoppedByAction);
     return result;
   }
   private int full(int jobId, ReadableMap options, float[] audioData, int audioDataLen) {
+    boolean hasProgressCallback = options.hasKey("onProgress") && options.getBoolean("onProgress");
+    boolean hasNewSegmentsCallback = options.hasKey("onNewSegments") && options.getBoolean("onNewSegments");
     return fullTranscribe(
       jobId,
       context,
@@ -478,13 +422,12 @@ public class WhisperContext {
       options.hasKey("language") ? options.getString("language") : "auto",
       // jstring prompt
       options.hasKey("prompt") ? options.getString("prompt") : null,
-      // ProgressCallback progressCallback
-      options.hasKey("onProgress") && options.getBoolean("onProgress") ? new ProgressCallback(this) : null
+      // Callback callback
+      hasProgressCallback || hasNewSegmentsCallback ? new Callback(this, hasProgressCallback, hasNewSegmentsCallback) : null
     );
   }
-  private WritableMap getTextSegments() {
-    Integer count = getTextSegmentCount(context);
+  private WritableMap getTextSegments(int start, int count) {
     StringBuilder builder = new StringBuilder();
     WritableMap data = Arguments.createMap();
@@ -537,28 +480,6 @@ public class WhisperContext {
     freeContext(context);
   }
-  public static float[] decodeWaveFile(InputStream inputStream) throws IOException {
-    ByteArrayOutputStream baos = new ByteArrayOutputStream();
-    byte[] buffer = new byte[1024];
-    int bytesRead;
-    while ((bytesRead = inputStream.read(buffer)) != -1) {
-      baos.write(buffer, 0, bytesRead);
-    }
-    ByteBuffer byteBuffer = ByteBuffer.wrap(baos.toByteArray());
-    byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
-    byteBuffer.position(44);
-    ShortBuffer shortBuffer = byteBuffer.asShortBuffer();
-    short[] shortArray = new short[shortBuffer.limit()];
-    shortBuffer.get(shortArray);
-    float[] floatArray = new float[shortArray.length];
-    for (int i = 0; i < shortArray.length; i++) {
-      floatArray[i] = ((float) shortArray[i]) / 32767.0f;
-      floatArray[i] = Math.max(floatArray[i], -1f);
-      floatArray[i] = Math.min(floatArray[i], 1f);
-    }
-    return floatArray;
-  }
   static {
     Log.d(NAME, "Primary ABI: " + Build.SUPPORTED_ABIS[0]);
     boolean loadVfpv4 = false;
@@ -647,7 +568,7 @@ public class WhisperContext {
     boolean translate,
     String language,
     String prompt,
-    ProgressCallback progressCallback
+    Callback Callback
   );
   protected static native void abortTranscribe(int jobId);
   protected static native void abortAllTranscribe();

package/android/src/main/jni.cpp CHANGED Viewed

@@ -206,9 +206,9 @@ Java_com_rnwhisper_WhisperContext_vadSimple(
     return is_speech;
 }
-struct progress_callback_context {
+struct callback_context {
     JNIEnv *env;
-    jobject progress_callback_instance;
+    jobject callback_instance;
 };
 JNIEXPORT jint JNICALL
@@ -234,7 +234,7 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
     jboolean translate,
     jstring language,
     jstring prompt,
-    jobject progress_callback_instance
+    jobject callback_instance
 ) {
     UNUSED(thiz);
     struct whisper_context *context = reinterpret_cast<struct whisper_context *>(context_ptr);
@@ -302,19 +302,30 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
     };
     params.encoder_begin_callback_user_data = rn_whisper_assign_abort_map(job_id);
-    if (progress_callback_instance != nullptr) {
+    if (callback_instance != nullptr) {
+        callback_context *cb_ctx = new callback_context;
+        cb_ctx->env = env;
+        cb_ctx->callback_instance = env->NewGlobalRef(callback_instance);
         params.progress_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, int progress, void * user_data) {
-            progress_callback_context *cb_ctx = (progress_callback_context *)user_data;
+            callback_context *cb_ctx = (callback_context *)user_data;
             JNIEnv *env = cb_ctx->env;
-            jobject progress_callback_instance = cb_ctx->progress_callback_instance;
-            jclass progress_callback_class = env->GetObjectClass(progress_callback_instance);
-            jmethodID onProgress = env->GetMethodID(progress_callback_class, "onProgress", "(I)V");
-            env->CallVoidMethod(progress_callback_instance, onProgress, progress);
+            jobject callback_instance = cb_ctx->callback_instance;
+            jclass callback_class = env->GetObjectClass(callback_instance);
+            jmethodID onProgress = env->GetMethodID(callback_class, "onProgress", "(I)V");
+            env->CallVoidMethod(callback_instance, onProgress, progress);
         };
-        progress_callback_context *cb_ctx = new progress_callback_context;
-        cb_ctx->env = env;
-        cb_ctx->progress_callback_instance = env->NewGlobalRef(progress_callback_instance);
         params.progress_callback_user_data = cb_ctx;
+        params.new_segment_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, int n_new, void * user_data) {
+            callback_context *cb_ctx = (callback_context *)user_data;
+            JNIEnv *env = cb_ctx->env;
+            jobject callback_instance = cb_ctx->callback_instance;
+            jclass callback_class = env->GetObjectClass(callback_instance);
+            jmethodID onNewSegments = env->GetMethodID(callback_class, "onNewSegments", "(I)V");
+            env->CallVoidMethod(callback_instance, onNewSegments, n_new);
+        };
+        params.new_segment_callback_user_data = cb_ctx;
     }
     LOGI("About to reset timings");

package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java CHANGED Viewed

@@ -6,6 +6,7 @@ import com.facebook.react.bridge.Promise;
 import com.facebook.react.bridge.ReactApplicationContext;
 import com.facebook.react.bridge.ReactMethod;
 import com.facebook.react.bridge.ReadableMap;
+import com.facebook.react.bridge.ReadableArray;
 import com.facebook.react.module.annotations.ReactModule;
 import java.util.HashMap;
@@ -65,4 +66,29 @@ public class RNWhisperModule extends NativeRNWhisperSpec {
   public void releaseAllContexts(Promise promise) {
     rnwhisper.releaseAllContexts(promise);
   }
+  /*
+   * iOS Specific methods, left here for make the turbo module happy:
+   */
+  @ReactMethod
+  public void getAudioSessionCurrentCategory(Promise promise) {
+    promise.resolve(null);
+  }
+  @ReactMethod
+  public void getAudioSessionCurrentMode(Promise promise) {
+    promise.resolve(null);
+  }
+  @ReactMethod
+  public void setAudioSessionCategory(String category, ReadableArray options, Promise promise) {
+    promise.resolve(null);
+  }
+  @ReactMethod
+  public void setAudioSessionMode(String mode, Promise promise) {
+    promise.resolve(null);
+  }
+  @ReactMethod
+  public void setAudioSessionActive(boolean active, Promise promise) {
+    promise.resolve(null);
+  }
 }

package/ios/RNWhisper.mm CHANGED Viewed

@@ -1,6 +1,8 @@
 #import "RNWhisper.h"
 #import "RNWhisperContext.h"
 #import "RNWhisperDownloader.h"
+#import "RNWhisperAudioUtils.h"
+#import "RNWhisperAudioSessionUtils.h"
 #include <stdlib.h>
 #include <string>
@@ -87,6 +89,7 @@ RCT_REMAP_METHOD(initContext,
 - (NSArray *)supportedEvents {
   return@[
     @"@RNWhisper_onTranscribeProgress",
+    @"@RNWhisper_onTranscribeNewSegments",
     @"@RNWhisper_onRealtimeTranscribe",
     @"@RNWhisper_onRealtimeTranscribeEnd",
   ];
@@ -121,7 +124,7 @@ RCT_REMAP_METHOD(transcribeFile,
     }
     int count = 0;
-    float *waveFile = [self decodeWaveFile:path count:&count];
+    float *waveFile = [RNWhisperAudioUtils decodeWaveFile:path count:&count];
     if (waveFile == nil) {
         reject(@"whisper_error", @"Invalid file", nil);
         return;
@@ -144,6 +147,20 @@ RCT_REMAP_METHOD(transcribeFile,
                 ];
             });
         }
+        onNewSegments: ^(NSDictionary *result) {
+            if (rn_whisper_transcribe_is_aborted(jobId)) {
+                return;
+            }
+            dispatch_async(dispatch_get_main_queue(), ^{
+                [self sendEventWithName:@"@RNWhisper_onTranscribeNewSegments"
+                    body:@{
+                        @"contextId": [NSNumber numberWithInt:contextId],
+                        @"jobId": [NSNumber numberWithInt:jobId],
+                        @"result": result
+                    }
+                ];
+            });
+        }
         onEnd: ^(int code) {
             if (code != 0) {
                 free(waveFile);
@@ -242,27 +259,6 @@ RCT_REMAP_METHOD(releaseAllContexts,
     resolve(nil);
 }
-- (float *)decodeWaveFile:(NSString*)filePath count:(int *)count {
-    NSURL *url = [NSURL fileURLWithPath:filePath];
-    NSData *fileData = [NSData dataWithContentsOfURL:url];
-    if (fileData == nil) {
-        return nil;
-    }
-    NSMutableData *waveData = [[NSMutableData alloc] init];
-    [waveData appendData:[fileData subdataWithRange:NSMakeRange(44, [fileData length]-44)]];
-    const short *shortArray = (const short *)[waveData bytes];
-    int shortCount = (int) ([waveData length] / sizeof(short));
-    float *floatArray = (float *) malloc(shortCount * sizeof(float));
-    for (NSInteger i = 0; i < shortCount; i++) {
-        float floatValue = ((float)shortArray[i]) / 32767.0;
-        floatValue = MAX(floatValue, -1.0);
-        floatValue = MIN(floatValue, 1.0);
-        floatArray[i] = floatValue;
-    }
-    *count = shortCount;
-    return floatArray;
-}
 - (void)invalidate {
     [super invalidate];
@@ -283,6 +279,69 @@ RCT_REMAP_METHOD(releaseAllContexts,
     [RNWhisperDownloader clearCache];
 }
+// MARK: - AudioSessionUtils
+RCT_EXPORT_METHOD(getAudioSessionCurrentCategory:(RCTPromiseResolveBlock)resolve
+                  withRejecter:(RCTPromiseRejectBlock)reject)
+{
+    NSString *category = [RNWhisperAudioSessionUtils getCurrentCategory];
+    NSArray *options = [RNWhisperAudioSessionUtils getCurrentOptions];
+    resolve(@{
+        @"category": category,
+        @"options": options
+    });
+}
+RCT_EXPORT_METHOD(getAudioSessionCurrentMode:(RCTPromiseResolveBlock)resolve
+                 withRejecter:(RCTPromiseRejectBlock)reject)
+{
+    NSString *mode = [RNWhisperAudioSessionUtils getCurrentMode];
+    resolve(mode);
+}
+RCT_REMAP_METHOD(setAudioSessionCategory,
+                 withCategory:(NSString *)category
+                 withOptions:(NSArray *)options
+                 withResolver:(RCTPromiseResolveBlock)resolve
+                 withRejecter:(RCTPromiseRejectBlock)reject)
+{
+    NSError *error = nil;
+    [RNWhisperAudioSessionUtils setCategory:category options:options error:&error];
+    if (error != nil) {
+        reject(@"whisper_error", [NSString stringWithFormat:@"Failed to set category. Error: %@", error], nil);
+        return;
+    }
+    resolve(nil);
+}
+RCT_REMAP_METHOD(setAudioSessionMode,
+                 withMode:(NSString *)mode
+                 withResolver:(RCTPromiseResolveBlock)resolve
+                 withRejecter:(RCTPromiseRejectBlock)reject)
+{
+    NSError *error = nil;
+    [RNWhisperAudioSessionUtils setMode:mode error:&error];
+    if (error != nil) {
+        reject(@"whisper_error", [NSString stringWithFormat:@"Failed to set mode. Error: %@", error], nil);
+        return;
+    }
+    resolve(nil);
+}
+RCT_REMAP_METHOD(setAudioSessionActive,
+                 withActive:(BOOL)active
+                 withResolver:(RCTPromiseResolveBlock)resolve
+                 withRejecter:(RCTPromiseRejectBlock)reject)
+{
+    NSError *error = nil;
+    [RNWhisperAudioSessionUtils setActive:active error:&error];
+    if (error != nil) {
+        reject(@"whisper_error", [NSString stringWithFormat:@"Failed to set active. Error: %@", error], nil);
+        return;
+    }
+    resolve(nil);
+}
 #ifdef RCT_NEW_ARCH_ENABLED
 - (std::shared_ptr<facebook::react::TurboModule>)getTurboModule:
     (const facebook::react::ObjCTurboModule::InitParams &)params

package/ios/RNWhisper.xcodeproj/project.pbxproj CHANGED Viewed

@@ -8,6 +8,7 @@
 /* Begin PBXBuildFile section */
 		5E555C0D2413F4C50049A1A2 /* RNWhisper.mm in Sources */ = {isa = PBXBuildFile; fileRef = B3E7B5891CC2AC0600A0062D /* RNWhisper.mm */; };
+		7F458E922AC7DC74007045F6 /* RNWhisperAudioSessionUtils.m in Sources */ = {isa = PBXBuildFile; fileRef = 7F458E912AC7DC74007045F6 /* RNWhisperAudioSessionUtils.m */; };
 		7FE0BBA12ABE6C7B0049B4E4 /* RNWhisperDownloader.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE0BB9B2ABE6C7B0049B4E4 /* RNWhisperDownloader.m */; };
 		7FE0BBA22ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE0BB9C2ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m */; };
 		7FE0BBA32ABE6C7B0049B4E4 /* RNWhisperContext.mm in Sources */ = {isa = PBXBuildFile; fileRef = 7FE0BBA02ABE6C7B0049B4E4 /* RNWhisperContext.mm */; };
@@ -27,6 +28,8 @@
 /* Begin PBXFileReference section */
 		134814201AA4EA6300B7C361 /* libRNWhisper.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libRNWhisper.a; sourceTree = BUILT_PRODUCTS_DIR; };
+		7F458E902AC7DC74007045F6 /* RNWhisperAudioSessionUtils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RNWhisperAudioSessionUtils.h; sourceTree = "<group>"; };
+		7F458E912AC7DC74007045F6 /* RNWhisperAudioSessionUtils.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RNWhisperAudioSessionUtils.m; sourceTree = "<group>"; };
 		7FE0BB9A2ABE6C7B0049B4E4 /* RNWhisper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RNWhisper.h; sourceTree = "<group>"; };
 		7FE0BB9B2ABE6C7B0049B4E4 /* RNWhisperDownloader.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RNWhisperDownloader.m; sourceTree = "<group>"; };
 		7FE0BB9C2ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RNWhisperAudioUtils.m; sourceTree = "<group>"; };
@@ -59,6 +62,8 @@
 		58B511D21A9E6C8500147676 = {
 			isa = PBXGroup;
 			children = (
+				7F458E902AC7DC74007045F6 /* RNWhisperAudioSessionUtils.h */,
+				7F458E912AC7DC74007045F6 /* RNWhisperAudioSessionUtils.m */,
 				7FE0BB9F2ABE6C7B0049B4E4 /* RNWhisperAudioUtils.h */,
 				7FE0BB9C2ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m */,
 				7FE0BB9A2ABE6C7B0049B4E4 /* RNWhisper.h */,
@@ -132,6 +137,7 @@
 				7FE0BBA22ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m in Sources */,
 				7FE0BBA32ABE6C7B0049B4E4 /* RNWhisperContext.mm in Sources */,
 				7FE0BBA12ABE6C7B0049B4E4 /* RNWhisperDownloader.m in Sources */,
+				7F458E922AC7DC74007045F6 /* RNWhisperAudioSessionUtils.m in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};

package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcuserdata/jhen.xcuserdatad/UserInterfaceState.xcuserstate CHANGED Viewed

Binary file