npm - whisper.rn - Versions diffs - 0.4.0-rc.4 → 0.4.0-rc.6 - Mend

whisper.rn 0.4.0-rc.4 → 0.4.0-rc.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

package/README.md +6 -6
package/android/build.gradle +4 -0
package/android/src/main/CMakeLists.txt +5 -0
package/android/src/main/java/com/rnwhisper/AudioUtils.java +0 -80
package/android/src/main/java/com/rnwhisper/WhisperContext.java +57 -134
package/android/src/main/jni-utils.h +76 -0
package/android/src/main/jni.cpp +188 -112
package/cpp/README.md +1 -1
package/cpp/coreml/whisper-encoder-impl.h +1 -1
package/cpp/coreml/whisper-encoder.h +4 -0
package/cpp/coreml/whisper-encoder.mm +4 -2
package/cpp/ggml-alloc.c +55 -19
package/cpp/ggml-alloc.h +8 -1
package/cpp/ggml-backend-impl.h +46 -21
package/cpp/ggml-backend.c +563 -156
package/cpp/ggml-backend.h +62 -17
package/cpp/ggml-impl.h +1 -1
package/cpp/ggml-metal-whisper.metal +2444 -359
package/cpp/ggml-metal.h +7 -1
package/cpp/ggml-metal.m +1105 -197
package/cpp/ggml-quants.c +66 -61
package/cpp/ggml-quants.h +40 -40
package/cpp/ggml.c +1040 -1590
package/cpp/ggml.h +109 -30
package/cpp/rn-audioutils.cpp +68 -0
package/cpp/rn-audioutils.h +14 -0
package/cpp/rn-whisper-log.h +11 -0
package/cpp/rn-whisper.cpp +143 -59
package/cpp/rn-whisper.h +48 -15
package/cpp/whisper.cpp +1635 -928
package/cpp/whisper.h +55 -10
package/ios/RNWhisper.mm +7 -7
package/ios/RNWhisperAudioUtils.h +0 -2
package/ios/RNWhisperAudioUtils.m +0 -56
package/ios/RNWhisperContext.h +3 -11
package/ios/RNWhisperContext.mm +68 -137
package/lib/commonjs/index.js.map +1 -1
package/lib/commonjs/version.json +1 -1
package/lib/module/index.js.map +1 -1
package/lib/module/version.json +1 -1
package/lib/typescript/index.d.ts +5 -0
package/lib/typescript/index.d.ts.map +1 -1
package/package.json +6 -5
package/src/index.ts +5 -0
package/src/version.json +1 -1
package/ios/RNWhisper.xcodeproj/project.xcworkspace/contents.xcworkspacedata +0 -4
package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +0 -8
package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcuserdata/jhen.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
package/ios/RNWhisper.xcodeproj/xcuserdata/jhen.xcuserdatad/xcschemes/xcschememanagement.plist +0 -19

package/README.md CHANGED Viewed

@@ -25,19 +25,19 @@ npm install whisper.rn
 Please re-run `npx pod-install` again.
-#### Android
 If you want to use `medium` or `large` model, the [Extended Virtual Addressing](https://developer.apple.com/documentation/bundleresources/entitlements/com_apple_developer_kernel_extended-virtual-addressing) capability is recommended to enable on iOS project.
-For Android, it's recommended to use `ndkVersion = "24.0.8215888"` (or above) in your root project build configuration for Apple Silicon Macs. Otherwise please follow this trobleshooting [issue](./TROUBLESHOOTING.md#android-got-build-error-unknown-host-cpu-architecture-arm64-on-apple-silicon-macs).
+#### Android
-Don't forget to add proguard rule if it's enabled in project (android/app/proguard-rules.pro):
+Add proguard rule if it's enabled in project (android/app/proguard-rules.pro):
 ```proguard
 # whisper.rn
 -keep class com.rnwhisper.** { *; }
 ```
+For build, it's recommended to use `ndkVersion = "24.0.8215888"` (or above) in your root project build configuration for Apple Silicon Macs. Otherwise please follow this trobleshooting [issue](./TROUBLESHOOTING.md#android-got-build-error-unknown-host-cpu-architecture-arm64-on-apple-silicon-macs).
 #### Expo
 You will need to prebuild the project before using it. See [Expo guide](https://docs.expo.io/guides/using-libraries/#using-a-library-in-a-expo-project) for more details.
@@ -91,7 +91,7 @@ subscribe(evt => {
   console.log(
     `Realtime transcribing: ${isCapturing ? 'ON' : 'OFF'}\n` +
       // The inference text result from audio record:
-      `Result: ${data.result}\n\n` +
+      `Result: ${data.result}\n\n` +
       `Process time: ${processTime}ms\n` +
       `Recording time: ${recordingTime}ms`,
   )
@@ -220,7 +220,7 @@ In real world, we recommended to split the asset imports into another platform s
 The example app provide a simple UI for testing the functions.
-Used Whisper model: `tiny.en` in https://huggingface.co/ggerganov/whisper.cpp
+Used Whisper model: `tiny.en` in https://huggingface.co/ggerganov/whisper.cpp
 Sample file: `jfk.wav` in https://github.com/ggerganov/whisper.cpp/tree/master/samples
 Please follow the [Development Workflow section of contributing guide](./CONTRIBUTING.md#development-workflow) to run the example app.

package/android/build.gradle CHANGED Viewed

@@ -36,6 +36,10 @@ def reactNativeArchitectures() {
 }
 android {
+  def agpVersion = com.android.Version.ANDROID_GRADLE_PLUGIN_VERSION
+  if (agpVersion.tokenize('.')[0].toInteger() >= 7) {
+    namespace "com.rnwhisper"
+  }
   ndkVersion getExtOrDefault("ndkVersion")
   compileSdkVersion getExtOrIntegerDefault("compileSdkVersion")

package/android/src/main/CMakeLists.txt CHANGED Viewed

@@ -12,6 +12,7 @@ set(
     ${RNWHISPER_LIB_DIR}/ggml-backend.c
     ${RNWHISPER_LIB_DIR}/ggml-quants.c
     ${RNWHISPER_LIB_DIR}/whisper.cpp
+    ${RNWHISPER_LIB_DIR}/rn-audioutils.cpp
     ${RNWHISPER_LIB_DIR}/rn-whisper.cpp
     ${CMAKE_SOURCE_DIR}/jni.cpp
 )
@@ -33,6 +34,10 @@ function(build_library target_name)
         target_compile_options(${target_name} PRIVATE -mfpu=neon-vfpv4)
     endif ()
+    if (${CMAKE_BUILD_TYPE} STREQUAL "Debug")
+        target_compile_options(${target_name} PRIVATE -DRNWHISPER_ANDROID_ENABLE_LOGGING)
+    endif ()
     # NOTE: If you want to debug the native code, you can uncomment if and endif
     # if (NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug")

package/android/src/main/java/com/rnwhisper/AudioUtils.java CHANGED Viewed

@@ -2,14 +2,10 @@ package com.rnwhisper;
 import android.util.Log;
-import java.util.ArrayList;
-import java.lang.StringBuilder;
 import java.io.IOException;
 import java.io.FileReader;
 import java.io.ByteArrayOutputStream;
 import java.io.File;
-import java.io.FileOutputStream;
-import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.ByteBuffer;
@@ -19,82 +15,6 @@ import java.nio.ShortBuffer;
 public class AudioUtils {
   private static final String NAME = "RNWhisperAudioUtils";
-  private static final int SAMPLE_RATE = 16000;
-  private static byte[] shortToByte(short[] shortInts) {
-    int j = 0;
-    int length = shortInts.length;
-    byte[] byteData = new byte[length * 2];
-    for (int i = 0; i < length; i++) {
-      byteData[j++] = (byte) (shortInts[i] >>> 8);
-      byteData[j++] = (byte) (shortInts[i] >>> 0);
-    }
-    return byteData;
-  }
-  public static byte[] concatShortBuffers(ArrayList<short[]> buffers) {
-    int totalLength = 0;
-    for (int i = 0; i < buffers.size(); i++) {
-      totalLength += buffers.get(i).length;
-    }
-    byte[] result = new byte[totalLength * 2];
-    int offset = 0;
-    for (int i = 0; i < buffers.size(); i++) {
-      byte[] bytes = shortToByte(buffers.get(i));
-      System.arraycopy(bytes, 0, result, offset, bytes.length);
-      offset += bytes.length;
-    }
-    return result;
-  }
-  private static byte[] removeTrailingZeros(byte[] audioData) {
-    int i = audioData.length - 1;
-    while (i >= 0 && audioData[i] == 0) {
-      --i;
-    }
-    byte[] newData = new byte[i + 1];
-    System.arraycopy(audioData, 0, newData, 0, i + 1);
-    return newData;
-  }
-  public static void saveWavFile(byte[] rawData, String audioOutputFile) throws IOException {
-    Log.d(NAME, "call saveWavFile");
-    rawData = removeTrailingZeros(rawData);
-    DataOutputStream output = null;
-    try {
-      output = new DataOutputStream(new FileOutputStream(audioOutputFile));
-      // WAVE header
-      // see http://ccrma.stanford.edu/courses/422/projects/WaveFormat/
-      output.writeBytes("RIFF"); // chunk id
-      output.writeInt(Integer.reverseBytes(36 + rawData.length)); // chunk size
-      output.writeBytes("WAVE"); // format
-      output.writeBytes("fmt "); // subchunk 1 id
-      output.writeInt(Integer.reverseBytes(16)); // subchunk 1 size
-      output.writeShort(Short.reverseBytes((short) 1)); // audio format (1 = PCM)
-      output.writeShort(Short.reverseBytes((short) 1)); // number of channels
-      output.writeInt(Integer.reverseBytes(SAMPLE_RATE)); // sample rate
-      output.writeInt(Integer.reverseBytes(SAMPLE_RATE * 2)); // byte rate
-      output.writeShort(Short.reverseBytes((short) 2)); // block align
-      output.writeShort(Short.reverseBytes((short) 16)); // bits per sample
-      output.writeBytes("data"); // subchunk 2 id
-      output.writeInt(Integer.reverseBytes(rawData.length)); // subchunk 2 size
-      // Audio data (conversion big endian -> little endian)
-      short[] shorts = new short[rawData.length / 2];
-      ByteBuffer.wrap(rawData).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shorts);
-      ByteBuffer bytes = ByteBuffer.allocate(shorts.length * 2);
-      for (short s : shorts) {
-        bytes.putShort(s);
-      }
-      Log.d(NAME, "writing audio file: " + audioOutputFile);
-      output.write(bytes.array());
-    } finally {
-      if (output != null) {
-        output.close();
-      }
-    }
-  }
   public static float[] decodeWaveFile(InputStream inputStream) throws IOException {
     ByteArrayOutputStream baos = new ByteArrayOutputStream();
     byte[] buffer = new byte[1024];

package/android/src/main/java/com/rnwhisper/WhisperContext.java CHANGED Viewed

@@ -42,7 +42,6 @@ public class WhisperContext {
   private AudioRecord recorder = null;
   private int bufferSize;
   private int nSamplesTranscribing = 0;
-  private ArrayList<short[]> shortBufferSlices;
   // Remember number of samples in each slice
   private ArrayList<Integer> sliceNSamples;
   // Current buffer slice index
@@ -66,7 +65,6 @@ public class WhisperContext {
   }
   private void rewind() {
-    shortBufferSlices = null;
     sliceNSamples = null;
     sliceIndex = 0;
     transcribeSliceIndex = 0;
@@ -79,41 +77,14 @@ public class WhisperContext {
     fullHandler = null;
   }
-  private boolean vad(ReadableMap options, short[] shortBuffer, int nSamples, int n) {
-    boolean isSpeech = true;
-    if (!isTranscribing && options.hasKey("useVad") && options.getBoolean("useVad")) {
-      int vadMs = options.hasKey("vadMs") ? options.getInt("vadMs") : 2000;
-      if (vadMs < 2000) vadMs = 2000;
-      int sampleSize = (int) (SAMPLE_RATE * vadMs / 1000);
-      if (nSamples + n > sampleSize) {
-        int start = nSamples + n - sampleSize;
-        float[] audioData = new float[sampleSize];
-        for (int i = 0; i < sampleSize; i++) {
-          audioData[i] = shortBuffer[i + start] / 32768.0f;
-        }
-        float vadThold = options.hasKey("vadThold") ? (float) options.getDouble("vadThold") : 0.6f;
-        float vadFreqThold = options.hasKey("vadFreqThold") ? (float) options.getDouble("vadFreqThold") : 0.6f;
-        isSpeech = vadSimple(audioData, sampleSize, vadThold, vadFreqThold);
-      } else {
-        isSpeech = false;
-      }
-    }
-    return isSpeech;
+  private boolean vad(int sliceIndex, int nSamples, int n) {
+    if (isTranscribing) return true;
+    return vadSimple(jobId, sliceIndex, nSamples, n);
   }
-  private void finishRealtimeTranscribe(ReadableMap options, WritableMap result) {
-    String audioOutputPath = options.hasKey("audioOutputPath") ? options.getString("audioOutputPath") : null;
-    if (audioOutputPath != null) {
-       // TODO: Append in real time so we don't need to keep all slices & also reduce memory usage
-      Log.d(NAME, "Begin saving wav file to " + audioOutputPath);
-      try {
-        AudioUtils.saveWavFile(AudioUtils.concatShortBuffers(shortBufferSlices), audioOutputPath);
-      } catch (IOException e) {
-        Log.e(NAME, "Error saving wav file: " + e.getMessage());
-      }
-    }
+  private void finishRealtimeTranscribe(WritableMap result) {
     emitTranscribeEvent("@RNWhisper_onRealtimeTranscribeEnd", Arguments.createMap());
+    finishRealtimeTranscribeJob(jobId, context, sliceNSamples.stream().mapToInt(i -> i).toArray());
   }
   public int startRealtimeTranscribe(int jobId, ReadableMap options) {
@@ -135,16 +106,15 @@ public class WhisperContext {
     int realtimeAudioSec = options.hasKey("realtimeAudioSec") ? options.getInt("realtimeAudioSec") : 0;
     final int audioSec = realtimeAudioSec > 0 ? realtimeAudioSec : DEFAULT_MAX_AUDIO_SEC;
     int realtimeAudioSliceSec = options.hasKey("realtimeAudioSliceSec") ? options.getInt("realtimeAudioSliceSec") : 0;
     final int audioSliceSec = realtimeAudioSliceSec > 0 && realtimeAudioSliceSec < audioSec ? realtimeAudioSliceSec : audioSec;
     isUseSlices = audioSliceSec < audioSec;
-    String audioOutputPath = options.hasKey("audioOutputPath") ? options.getString("audioOutputPath") : null;
+    double realtimeAudioMinSec = options.hasKey("realtimeAudioMinSec") ? options.getDouble("realtimeAudioMinSec") : 0;
+    final double audioMinSec = realtimeAudioMinSec > 0.5 && realtimeAudioMinSec <= audioSliceSec ? realtimeAudioMinSec : 1;
+    createRealtimeTranscribeJob(jobId, context, options);
-    shortBufferSlices = new ArrayList<short[]>();
-    shortBufferSlices.add(new short[audioSliceSec * SAMPLE_RATE]);
     sliceNSamples = new ArrayList<Integer>();
     sliceNSamples.add(0);
@@ -175,49 +145,43 @@ public class WhisperContext {
                   nSamples == nSamplesTranscribing &&
                   sliceIndex == transcribeSliceIndex
                 ) {
-                  finishRealtimeTranscribe(options, Arguments.createMap());
+                  finishRealtimeTranscribe(Arguments.createMap());
                 } else if (!isTranscribing) {
-                  short[] shortBuffer = shortBufferSlices.get(sliceIndex);
-                  boolean isSpeech = vad(options, shortBuffer, nSamples, 0);
-                  if (!isSpeech) {
-                    finishRealtimeTranscribe(options, Arguments.createMap());
+                  boolean isSamplesEnough = nSamples / SAMPLE_RATE >= audioMinSec;
+                  if (!isSamplesEnough || !vad(sliceIndex, nSamples, 0)) {
+                    finishRealtimeTranscribe(Arguments.createMap());
                     break;
                   }
                   isTranscribing = true;
-                  fullTranscribeSamples(options, true);
+                  fullTranscribeSamples(true);
                 }
                 break;
               }
               // Append to buffer
-              short[] shortBuffer = shortBufferSlices.get(sliceIndex);
               if (nSamples + n > audioSliceSec * SAMPLE_RATE) {
                 Log.d(NAME, "next slice");
                 sliceIndex++;
                 nSamples = 0;
-                shortBuffer = new short[audioSliceSec * SAMPLE_RATE];
-                shortBufferSlices.add(shortBuffer);
                 sliceNSamples.add(0);
               }
+              putPcmData(jobId, buffer, sliceIndex, nSamples, n);
-              for (int i = 0; i < n; i++) {
-                shortBuffer[nSamples + i] = buffer[i];
-              }
-              boolean isSpeech = vad(options, shortBuffer, nSamples, n);
+              boolean isSpeech = vad(sliceIndex, nSamples, n);
               nSamples += n;
               sliceNSamples.set(sliceIndex, nSamples);
-              if (!isSpeech) continue;
+              boolean isSamplesEnough = nSamples / SAMPLE_RATE >= audioMinSec;
+              if (!isSamplesEnough || !isSpeech) continue;
               if (!isTranscribing && nSamples > SAMPLE_RATE / 2) {
                 isTranscribing = true;
                 fullHandler = new Thread(new Runnable() {
                   @Override
                   public void run() {
-                    fullTranscribeSamples(options, false);
+                    fullTranscribeSamples(false);
                   }
                 });
                 fullHandler.start();
@@ -228,7 +192,7 @@ public class WhisperContext {
           }
           if (!isTranscribing) {
-            finishRealtimeTranscribe(options, Arguments.createMap());
+            finishRealtimeTranscribe(Arguments.createMap());
           }
           if (fullHandler != null) {
             fullHandler.join(); // Wait for full transcribe to finish
@@ -246,26 +210,16 @@ public class WhisperContext {
     return state;
   }
-  private void fullTranscribeSamples(ReadableMap options, boolean skipCapturingCheck) {
+  private void fullTranscribeSamples(boolean skipCapturingCheck) {
     int nSamplesOfIndex = sliceNSamples.get(transcribeSliceIndex);
     if (!isCapturing && !skipCapturingCheck) return;
-    short[] shortBuffer = shortBufferSlices.get(transcribeSliceIndex);
-    int nSamples = sliceNSamples.get(transcribeSliceIndex);
     nSamplesTranscribing = nSamplesOfIndex;
-    // convert I16 to F32
-    float[] nSamplesBuffer32 = new float[nSamplesTranscribing];
-    for (int i = 0; i < nSamplesTranscribing; i++) {
-      nSamplesBuffer32[i] = shortBuffer[i] / 32768.0f;
-    }
     Log.d(NAME, "Start transcribing realtime: " + nSamplesTranscribing);
     int timeStart = (int) System.currentTimeMillis();
-    int code = full(jobId, options, nSamplesBuffer32, nSamplesTranscribing);
+    int code = fullWithJob(jobId, context, transcribeSliceIndex, nSamplesTranscribing);
     int timeEnd = (int) System.currentTimeMillis();
     int timeRecording = (int) (nSamplesTranscribing / SAMPLE_RATE * 1000);
@@ -302,7 +256,7 @@ public class WhisperContext {
     if (isStopped && !continueNeeded) {
       payload.putBoolean("isCapturing", false);
       payload.putBoolean("isStoppedByAction", isStoppedByAction);
-      finishRealtimeTranscribe(options, payload);
+      finishRealtimeTranscribe(payload);
     } else if (code == 0) {
       payload.putBoolean("isCapturing", true);
       emitTranscribeEvent("@RNWhisper_onRealtimeTranscribe", payload);
@@ -313,7 +267,7 @@ public class WhisperContext {
     if (continueNeeded) {
       // If no more capturing, continue transcribing until all slices are transcribed
-      fullTranscribeSamples(options, true);
+      fullTranscribeSamples(true);
     } else if (isStopped) {
       // No next, cleanup
       rewind();
@@ -383,62 +337,30 @@ public class WhisperContext {
     this.jobId = jobId;
     isTranscribing = true;
     float[] audioData = AudioUtils.decodeWaveFile(inputStream);
-    int code = full(jobId, options, audioData, audioData.length);
-    isTranscribing = false;
-    this.jobId = -1;
-    if (code != 0 && code != 999) {
-      throw new Exception("Failed to transcribe the file. Code: " + code);
-    }
-    WritableMap result = getTextSegments(0, getTextSegmentCount(context));
-    result.putBoolean("isAborted", isStoppedByAction);
-    return result;
-  }
-  private int full(int jobId, ReadableMap options, float[] audioData, int audioDataLen) {
     boolean hasProgressCallback = options.hasKey("onProgress") && options.getBoolean("onProgress");
     boolean hasNewSegmentsCallback = options.hasKey("onNewSegments") && options.getBoolean("onNewSegments");
-    return fullTranscribe(
+    int code = fullWithNewJob(
       jobId,
       context,
       // float[] audio_data,
       audioData,
       // jint audio_data_len,
-      audioDataLen,
-      // jint n_threads,
-      options.hasKey("maxThreads") ? options.getInt("maxThreads") : -1,
-      // jint max_context,
-      options.hasKey("maxContext") ? options.getInt("maxContext") : -1,
-      // jint word_thold,
-      options.hasKey("wordThold") ? options.getInt("wordThold") : -1,
-      // jint max_len,
-      options.hasKey("maxLen") ? options.getInt("maxLen") : -1,
-      // jboolean token_timestamps,
-      options.hasKey("tokenTimestamps") ? options.getBoolean("tokenTimestamps") : false,
-      // jint offset,
-      options.hasKey("offset") ? options.getInt("offset") : -1,
-      // jint duration,
-      options.hasKey("duration") ? options.getInt("duration") : -1,
-      // jfloat temperature,
-      options.hasKey("temperature") ? (float) options.getDouble("temperature") : -1.0f,
-      // jfloat temperature_inc,
-      options.hasKey("temperatureInc") ? (float) options.getDouble("temperatureInc") : -1.0f,
-      // jint beam_size,
-      options.hasKey("beamSize") ? options.getInt("beamSize") : -1,
-      // jint best_of,
-      options.hasKey("bestOf") ? options.getInt("bestOf") : -1,
-      // jboolean speed_up,
-      options.hasKey("speedUp") ? options.getBoolean("speedUp") : false,
-      // jboolean translate,
-      options.hasKey("translate") ? options.getBoolean("translate") : false,
-      // jstring language,
-      options.hasKey("language") ? options.getString("language") : "auto",
-      // jstring prompt
-      options.hasKey("prompt") ? options.getString("prompt") : null,
+      audioData.length,
+      // ReadableMap options,
+      options,
       // Callback callback
       hasProgressCallback || hasNewSegmentsCallback ? new Callback(this, hasProgressCallback, hasNewSegmentsCallback) : null
     );
+    isTranscribing = false;
+    this.jobId = -1;
+    if (code != 0 && code != 999) {
+      throw new Exception("Failed to transcribe the file. Code: " + code);
+    }
+    WritableMap result = getTextSegments(0, getTextSegmentCount(context));
+    result.putBoolean("isAborted", isStoppedByAction);
+    return result;
   }
   private WritableMap getTextSegments(int start, int count) {
@@ -557,31 +479,18 @@ public class WhisperContext {
     }
   }
+  // JNI methods
   protected static native long initContext(String modelPath);
   protected static native long initContextWithAsset(AssetManager assetManager, String modelPath);
   protected static native long initContextWithInputStream(PushbackInputStream inputStream);
-  protected static native boolean vadSimple(float[] audio_data, int audio_data_len, float vad_thold, float vad_freq_thold);
-  protected static native int fullTranscribe(
+  protected static native void freeContext(long contextPtr);
+  protected static native int fullWithNewJob(
     int job_id,
     long context,
     float[] audio_data,
     int audio_data_len,
-    int n_threads,
-    int max_context,
-    int word_thold,
-    int max_len,
-    boolean token_timestamps,
-    int offset,
-    int duration,
-    float temperature,
-    float temperature_inc,
-    int beam_size,
-    int best_of,
-    boolean speed_up,
-    boolean translate,
-    String language,
-    String prompt,
+    ReadableMap options,
     Callback Callback
   );
   protected static native void abortTranscribe(int jobId);
@@ -590,5 +499,19 @@ public class WhisperContext {
   protected static native String getTextSegment(long context, int index);
   protected static native int getTextSegmentT0(long context, int index);
   protected static native int getTextSegmentT1(long context, int index);
-  protected static native void freeContext(long contextPtr);
+  protected static native void createRealtimeTranscribeJob(
+    int job_id,
+    long context,
+    ReadableMap options
+  );
+  protected static native void finishRealtimeTranscribeJob(int job_id, long context, int[] sliceNSamples);
+  protected static native boolean vadSimple(int job_id, int slice_index, int n_samples, int n);
+  protected static native void putPcmData(int job_id, short[] buffer, int slice_index, int n_samples, int n);
+  protected static native int fullWithJob(
+    int job_id,
+    long context,
+    int slice_index,
+    int n_samples
+  );
 }

package/android/src/main/jni-utils.h ADDED Viewed

@@ -0,0 +1,76 @@
+#include <jni.h>
+// ReadableMap utils
+namespace readablemap {
+bool hasKey(JNIEnv *env, jobject readableMap, const char *key) {
+    jclass mapClass = env->GetObjectClass(readableMap);
+    jmethodID hasKeyMethod = env->GetMethodID(mapClass, "hasKey", "(Ljava/lang/String;)Z");
+    jstring jKey = env->NewStringUTF(key);
+    jboolean result = env->CallBooleanMethod(readableMap, hasKeyMethod, jKey);
+    env->DeleteLocalRef(jKey);
+    return result;
+}
+int getInt(JNIEnv *env, jobject readableMap, const char *key, jint defaultValue) {
+    if (!hasKey(env, readableMap, key)) {
+        return defaultValue;
+    }
+    jclass mapClass = env->GetObjectClass(readableMap);
+    jmethodID getIntMethod = env->GetMethodID(mapClass, "getInt", "(Ljava/lang/String;)I");
+    jstring jKey = env->NewStringUTF(key);
+    jint result = env->CallIntMethod(readableMap, getIntMethod, jKey);
+    env->DeleteLocalRef(jKey);
+    return result;
+}
+bool getBool(JNIEnv *env, jobject readableMap, const char *key, jboolean defaultValue) {
+    if (!hasKey(env, readableMap, key)) {
+        return defaultValue;
+    }
+    jclass mapClass = env->GetObjectClass(readableMap);
+    jmethodID getBoolMethod = env->GetMethodID(mapClass, "getBoolean", "(Ljava/lang/String;)Z");
+    jstring jKey = env->NewStringUTF(key);
+    jboolean result = env->CallBooleanMethod(readableMap, getBoolMethod, jKey);
+    env->DeleteLocalRef(jKey);
+    return result;
+}
+long getLong(JNIEnv *env, jobject readableMap, const char *key, jlong defaultValue) {
+    if (!hasKey(env, readableMap, key)) {
+        return defaultValue;
+    }
+    jclass mapClass = env->GetObjectClass(readableMap);
+    jmethodID getLongMethod = env->GetMethodID(mapClass, "getLong", "(Ljava/lang/String;)J");
+    jstring jKey = env->NewStringUTF(key);
+    jlong result = env->CallLongMethod(readableMap, getLongMethod, jKey);
+    env->DeleteLocalRef(jKey);
+    return result;
+}
+float getFloat(JNIEnv *env, jobject readableMap, const char *key, jfloat defaultValue) {
+    if (!hasKey(env, readableMap, key)) {
+        return defaultValue;
+    }
+    jclass mapClass = env->GetObjectClass(readableMap);
+    jmethodID getFloatMethod = env->GetMethodID(mapClass, "getDouble", "(Ljava/lang/String;)D");
+    jstring jKey = env->NewStringUTF(key);
+    jfloat result = env->CallDoubleMethod(readableMap, getFloatMethod, jKey);
+    env->DeleteLocalRef(jKey);
+    return result;
+}
+jstring getString(JNIEnv *env, jobject readableMap, const char *key, jstring defaultValue) {
+    if (!hasKey(env, readableMap, key)) {
+        return defaultValue;
+    }
+    jclass mapClass = env->GetObjectClass(readableMap);
+    jmethodID getStringMethod = env->GetMethodID(mapClass, "getString", "(Ljava/lang/String;)Ljava/lang/String;");
+    jstring jKey = env->NewStringUTF(key);
+    jstring result = (jstring) env->CallObjectMethod(readableMap, getStringMethod, jKey);
+    env->DeleteLocalRef(jKey);
+    return result;
+}
+}