npm - whisper.rn - Versions diffs - 0.3.0-rc.4 → 0.3.0-rc.6 - Mend

whisper.rn 0.3.0-rc.4 → 0.3.0-rc.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/README.md +74 -11
package/android/build.gradle +9 -0
package/android/src/main/java/com/rnwhisper/Downloader.java +83 -0
package/android/src/main/java/com/rnwhisper/RNWhisperPackage.java +33 -13
package/android/src/main/java/com/rnwhisper/WhisperContext.java +9 -9
package/android/src/main/jni/whisper/Whisper.mk +1 -1
package/android/src/main/jni/whisper/jni.cpp +102 -0
package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +286 -0
package/android/src/{main → oldarch}/java/com/rnwhisper/RNWhisperModule.java +59 -5
package/ios/RNWhisper.mm +54 -8
package/ios/RNWhisperDownloader.h +8 -0
package/ios/RNWhisperDownloader.m +39 -0
package/lib/commonjs/NativeRNWhisper.js +10 -0
package/lib/commonjs/NativeRNWhisper.js.map +1 -0
package/lib/commonjs/index.js +81 -23
package/lib/commonjs/index.js.map +1 -1
package/lib/module/NativeRNWhisper.js +3 -0
package/lib/module/NativeRNWhisper.js.map +1 -0
package/lib/module/index.js +73 -16
package/lib/module/index.js.map +1 -1
package/lib/typescript/NativeRNWhisper.d.ts +65 -0
package/lib/typescript/NativeRNWhisper.d.ts.map +1 -0
package/lib/typescript/index.d.ts +17 -43
package/lib/typescript/index.d.ts.map +1 -1
package/package.json +6 -1
package/src/NativeRNWhisper.ts +79 -0
package/src/index.ts +164 -122

package/README.md CHANGED Viewed

@@ -8,9 +8,12 @@ React Native binding of [whisper.cpp](https://github.com/ggerganov/whisper.cpp).
 [whisper.cpp](https://github.com/ggerganov/whisper.cpp): High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model
-<img src="https://user-images.githubusercontent.com/3001525/225511664-8b2ba3ec-864d-4f55-bcb0-447aef168a32.jpeg" width="500" />
+## Screenshots
-> Run example with release mode on iPhone 13 Pro Max
+| <img src="https://github.com/mybigday/whisper.rn/assets/3001525/2fea7b2d-c911-44fb-9afc-8efc7b594446" width="300" /> | <img src="https://github.com/mybigday/whisper.rn/assets/3001525/a5005a6c-44f7-4db9-95e8-0fd951a2e147" width="300" /> |
+| :------------------------------------------: | :------------------------------------------: |
+| iOS: Tested on iPhone 13 Pro Max | Android: Tested on Pixel 6 |
+| (tiny.en, Core ML enabled) | (tiny.en, armv8.2-a+fp16) |
 ## Installation
@@ -47,8 +50,7 @@ Add the following line to ```android/app/src/main/AndroidManifest.xml```
 import { initWhisper } from 'whisper.rn'
 const whisperContext = await initWhisper({
-  filePath: 'file://.../ggml-base.en.bin',
-  isBundleAsset: false, // Set to true if you want to load the model from bundle resources, the filePath will be like `ggml-base.en.bin`
+  filePath: 'file://.../ggml-tiny.en.bin',
 })
 const sampleFilePath = 'file://.../sample.wav'
@@ -81,35 +83,92 @@ In Android, you may need to request the microphone permission by [`PermissionAnd
 Please visit the [Documentation](docs/) for more details.
+## Usage with assets
+You can also use the model file / audio file from assets:
+```js
+import { initWhisper } from 'whisper.rn'
+const whisperContext = await initWhisper({
+  filePath: require('../assets/ggml-tiny.en.bin'),
+})
+const { stop, promise } =
+  whisperContext.transcribe(require('../assets/sample.wav'), options)
+// ...
+```
+This requires editing the `metro.config.js` to support assets:
+```js
+// ...
+const defaultAssetExts = require('metro-config/src/defaults/defaults').assetExts
+module.exports = {
+  // ...
+  resolver: {
+    // ...
+    assetExts: [
+      ...defaultAssetExts,
+      'bin', // whisper.rn: ggml model binary
+      'mil', // whisper.rn: CoreML model asset
+    ]
+  },
+}
+```
+Please note that it will significantly increase the size of the app in release mode.
 ## Core ML support
 __*Platform: iOS 15.0+, tvOS 15.0+*__
 To use Core ML on iOS, you will need to have the Core ML model files.
-The `.mlmodelc` model files is load depend on the ggml model file path. For example, if your ggml model path is `ggml-base.en.bin`, the Core ML model path will be `ggml-base.en-encoder.mlmodelc`. Please note that the ggml model is still needed as decoder or encoder fallback.
+The `.mlmodelc` model files is load depend on the ggml model file path. For example, if your ggml model path is `ggml-tiny.en.bin`, the Core ML model path will be `ggml-tiny.en-encoder.mlmodelc`. Please note that the ggml model is still needed as decoder or encoder fallback.
 Currently there is no official way to get the Core ML models by URL, you will need to convert Core ML models by yourself. Please see [Core ML Support](https://github.com/ggerganov/whisper.cpp#core-ml-support) of whisper.cpp for more details.
-During the `.mlmodelc` is a directory, you will need to download 5 files:
+During the `.mlmodelc` is a directory, you will need to download 5 files (3 required):
 ```json5
 [
   'model.mil',
-  'metadata.json',
   'coremldata.bin',
   'weights/weight.bin',
-  'analytics/coremldata.bin',
+  // Not required:
+  // 'metadata.json', 'analytics/coremldata.bin',
 ]
 ```
-Or just add them to your app's bundle resources, like the example app does, but this would increase the app size significantly.
+Or just use `require` to bundle that in your app, like the example app does, but this would increase the app size significantly.
+```js
+const whisperContext = await initWhisper({
+  filePath: require('../assets/ggml-tiny.en.bin')
+  coreMLModelAsset:
+    Platform.OS === 'ios'
+      ? {
+          filename: 'ggml-tiny.en-encoder.mlmodelc',
+          assets: [
+            require('../assets/ggml-tiny.en-encoder.mlmodelc/weights/weight.bin'),
+            require('../assets/ggml-tiny.en-encoder.mlmodelc/model.mil'),
+            require('../assets/ggml-tiny.en-encoder.mlmodelc/coremldata.bin'),
+          ],
+        }
+      : undefined,
+})
+```
+In real world, we recommended to split the asset imports into another platform specific file (e.g. `context-opts.ios.js`) to avoid these unused files in the bundle for Android.
 ## Run with example
-The example app is using [react-native-fs](https://github.com/itinance/react-native-fs) to download the model file and audio file.
+The example app provide a simple UI for testing the functions.
-Model: `base.en` in https://huggingface.co/datasets/ggerganov/whisper.cpp
+Used Whisper model: `tiny.en` in https://huggingface.co/datasets/ggerganov/whisper.cpp
 Sample file: `jfk.wav` in https://github.com/ggerganov/whisper.cpp/tree/master/samples
 For test better performance on transcribe, you can run the app in Release mode.
@@ -130,6 +189,10 @@ jest.mock('whisper.rn', () => require('whisper.rn/jest/mock'))
 See the [contributing guide](CONTRIBUTING.md) to learn how to contribute to the repository and the development workflow.
+## Troubleshooting
+See the [troubleshooting](TROUBLESHOOTING.md) if you encounter any problem while using `whisper.rn`.
 ## License
 MIT

package/android/build.gradle CHANGED Viewed

@@ -59,6 +59,15 @@ android {
     targetCompatibility JavaVersion.VERSION_1_8
   }
+  sourceSets {
+    main {
+      if (isNewArchitectureEnabled()) {
+        java.srcDirs += ['src/newarch']
+      } else {
+        java.srcDirs += ['src/oldarch']
+      }
+    }
+  }
 }
 repositories {

package/android/src/main/java/com/rnwhisper/Downloader.java ADDED Viewed

@@ -0,0 +1,83 @@
+package com.rnwhisper;
+import android.content.Context;
+import java.io.BufferedInputStream;
+import java.io.FileOutputStream;
+import java.io.File;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.URL;
+import java.net.URLConnection;
+/**
+ * NOTE: This is simple downloader,
+ * the main purpose is supported load assets on RN Debug mode,
+ * so it's a very crude implementation.
+ *
+ * If you want to use file download in production to load model / audio files,
+ * I would recommend using react-native-fs or expo-file-system to manage the files.
+ */
+public class Downloader {
+  private static Context context;
+  public Downloader(Context context) {
+    this.context = context;
+  }
+  private String getDir() {
+    String dir = context.getCacheDir().getAbsolutePath() + "/rnwhisper_debug_assets/";
+    File file = new File(dir);
+    if (!file.exists()) {
+      file.mkdirs();
+    }
+    return dir;
+  }
+  private boolean fileExists(String filename) {
+    File file = new File(getDir() + filename);
+    return file.exists();
+  }
+  public String downloadFile(String urlPath) throws Exception {
+    String filename = urlPath.substring(urlPath.lastIndexOf('/') + 1);
+    if (filename.contains("?")) {
+      filename = filename.substring(0, filename.indexOf("?"));
+    }
+    String filepath = getDir() + filename;
+    if (fileExists(filename)) {
+      return filepath;
+    }
+    try {
+      URL url = new URL(urlPath);
+      URLConnection connection = url.openConnection();
+      connection.connect();
+      InputStream input = new BufferedInputStream(url.openStream());
+      OutputStream output = new FileOutputStream(filepath);
+      byte data[] = new byte[1024];
+      int count;
+      while ((count = input.read(data)) != -1) {
+        output.write(data, 0, count);
+      }
+      output.flush();
+      output.close();
+      input.close();
+    } catch (Exception e) {
+      throw e;
+    }
+    return filepath;
+  }
+  private void deleteFile(File fileOrDir) {
+    if (fileOrDir.isDirectory()) {
+      for (File child : fileOrDir.listFiles()) {
+        deleteFile(child);
+      }
+    }
+    fileOrDir.delete();
+  }
+  public void clearCache() {
+    deleteFile(new File(getDir()));
+  }
+}

package/android/src/main/java/com/rnwhisper/RNWhisperPackage.java CHANGED Viewed

@@ -1,28 +1,48 @@
 package com.rnwhisper;
 import androidx.annotation.NonNull;
+import androidx.annotation.Nullable;
-import com.facebook.react.ReactPackage;
 import com.facebook.react.bridge.NativeModule;
 import com.facebook.react.bridge.ReactApplicationContext;
-import com.facebook.react.uimanager.ViewManager;
+import com.facebook.react.module.model.ReactModuleInfo;
+import com.facebook.react.module.model.ReactModuleInfoProvider;
+import com.facebook.react.TurboReactPackage;
-import java.util.ArrayList;
-import java.util.Collections;
 import java.util.List;
+import java.util.HashMap;
+import java.util.Map;
-public class RNWhisperPackage implements ReactPackage {
-  @NonNull
+public class RNWhisperPackage extends TurboReactPackage {
+  @Nullable
   @Override
-  public List<NativeModule> createNativeModules(@NonNull ReactApplicationContext reactContext) {
-    List<NativeModule> modules = new ArrayList<>();
-    modules.add(new RNWhisperModule(reactContext));
-    return modules;
+  public NativeModule getModule(String name, ReactApplicationContext reactContext) {
+    if (name.equals(RNWhisperModule.NAME)) {
+      return new com.rnwhisper.RNWhisperModule(reactContext);
+    } else {
+      return null;
+    }
   }
-  @NonNull
   @Override
-  public List<ViewManager> createViewManagers(@NonNull ReactApplicationContext reactContext) {
-    return Collections.emptyList();
+  public ReactModuleInfoProvider getReactModuleInfoProvider() {
+    return () -> {
+      final Map<String, ReactModuleInfo> moduleInfos = new HashMap<>();
+      boolean isTurboModule = BuildConfig.IS_NEW_ARCHITECTURE_ENABLED;
+      moduleInfos.put(
+        RNWhisperModule.NAME,
+        new ReactModuleInfo(
+          RNWhisperModule.NAME,
+          RNWhisperModule.NAME,
+          false, // canOverrideExistingModule
+          false, // needsEagerInit
+          true, // hasConstants
+          false, // isCxxModule
+          isTurboModule // isTurboModule
+        )
+      );
+      return moduleInfos;
+    };
   }
 }

package/android/src/main/java/com/rnwhisper/WhisperContext.java CHANGED Viewed

@@ -26,6 +26,7 @@ import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.PushbackInputStream;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
 import java.nio.ShortBuffer;
@@ -281,10 +282,10 @@ public class WhisperContext {
     eventEmitter.emit(eventName, event);
   }
-  public WritableMap transcribeFile(int jobId, String filePath, ReadableMap options) throws IOException, Exception {
+  public WritableMap transcribeInputStream(int jobId, InputStream inputStream, ReadableMap options) throws IOException, Exception {
     this.jobId = jobId;
     isTranscribing = true;
-    float[] audioData = decodeWaveFile(new File(filePath));
+    float[] audioData = decodeWaveFile(inputStream);
     int code = full(jobId, options, audioData, audioData.length);
     isTranscribing = false;
     this.jobId = -1;
@@ -383,14 +384,12 @@ public class WhisperContext {
     freeContext(context);
   }
-  public static float[] decodeWaveFile(File file) throws IOException {
+  public static float[] decodeWaveFile(InputStream inputStream) throws IOException {
     ByteArrayOutputStream baos = new ByteArrayOutputStream();
-    try (InputStream inputStream = new FileInputStream(file)) {
-      byte[] buffer = new byte[1024];
-      int bytesRead;
-      while ((bytesRead = inputStream.read(buffer)) != -1) {
-        baos.write(buffer, 0, bytesRead);
-      }
+    byte[] buffer = new byte[1024];
+    int bytesRead;
+    while ((bytesRead = inputStream.read(buffer)) != -1) {
+      baos.write(buffer, 0, bytesRead);
     }
     ByteBuffer byteBuffer = ByteBuffer.wrap(baos.toByteArray());
     byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
@@ -472,6 +471,7 @@ public class WhisperContext {
   protected static native long initContext(String modelPath);
   protected static native long initContextWithAsset(AssetManager assetManager, String modelPath);
+  protected static native long initContextWithInputStream(PushbackInputStream inputStream);
   protected static native int fullTranscribe(
     int job_id,
     long context,

package/android/src/main/jni/whisper/Whisper.mk CHANGED Viewed

@@ -3,7 +3,7 @@ LOCAL_LDLIBS    := -landroid -llog
 # Make the final output library smaller by only keeping the symbols referenced from the app.
 ifneq ($(APP_OPTIM),debug)
-    LOCAL_CFLAGS += -O3
+    LOCAL_CFLAGS += -O3 -DNDEBUG
     LOCAL_CFLAGS += -fvisibility=hidden -fvisibility-inlines-hidden
     LOCAL_CFLAGS += -ffunction-sections -fdata-sections
     LOCAL_LDFLAGS += -Wl,--gc-sections

package/android/src/main/jni/whisper/jni.cpp CHANGED Viewed

@@ -20,6 +20,97 @@ static inline int min(int a, int b) {
     return (a < b) ? a : b;
 }
+// Load model from input stream (used for drawable / raw resources)
+struct input_stream_context {
+    JNIEnv *env;
+    jobject input_stream;
+};
+static size_t input_stream_read(void *ctx, void *output, size_t read_size) {
+    input_stream_context *context = (input_stream_context *)ctx;
+    JNIEnv *env = context->env;
+    jobject input_stream = context->input_stream;
+    jclass input_stream_class = env->GetObjectClass(input_stream);
+    jbyteArray buffer = env->NewByteArray(read_size);
+    jint bytes_read = env->CallIntMethod(
+        input_stream,
+        env->GetMethodID(input_stream_class, "read", "([B)I"),
+        buffer
+    );
+    if (bytes_read > 0) {
+        env->GetByteArrayRegion(buffer, 0, bytes_read, (jbyte *) output);
+    }
+    env->DeleteLocalRef(buffer);
+    return bytes_read;
+}
+static bool input_stream_is_eof(void *ctx) {
+    input_stream_context *context = (input_stream_context *)ctx;
+    JNIEnv *env = context->env;
+    jobject input_stream = context->input_stream;
+    jclass input_stream_class = env->GetObjectClass(input_stream);
+    jbyteArray buffer = env->NewByteArray(1);
+    jint bytes_read = env->CallIntMethod(
+        input_stream,
+        env->GetMethodID(input_stream_class, "read", "([B)I"),
+        buffer
+    );
+    bool is_eof = (bytes_read == -1);
+    if (!is_eof) {
+        // If we successfully read a byte, "unread" it by pushing it back into the stream.
+        env->CallVoidMethod(
+            input_stream,
+            env->GetMethodID(input_stream_class, "unread", "([BII)V"),
+            buffer,
+            0,
+            1
+        );
+    }
+    env->DeleteLocalRef(buffer);
+    return is_eof;
+}
+static void input_stream_close(void *ctx) {
+    input_stream_context *context = (input_stream_context *)ctx;
+    JNIEnv *env = context->env;
+    jobject input_stream = context->input_stream;
+    jclass input_stream_class = env->GetObjectClass(input_stream);
+    env->CallVoidMethod(
+        input_stream,
+        env->GetMethodID(input_stream_class, "close", "()V")
+    );
+    env->DeleteGlobalRef(input_stream);
+}
+static struct whisper_context *whisper_init_from_input_stream(
+    JNIEnv *env,
+    jobject input_stream // PushbackInputStream
+) {
+    input_stream_context *context = new input_stream_context;
+    context->env = env;
+    context->input_stream = env->NewGlobalRef(input_stream);
+    whisper_model_loader loader = {
+        .context = context,
+        .read = &input_stream_read,
+        .eof = &input_stream_is_eof,
+        .close = &input_stream_close
+    };
+    return whisper_init(&loader);
+}
+// Load model from asset
 static size_t asset_read(void *ctx, void *output, size_t read_size) {
     return AAsset_read((AAsset *) ctx, output, read_size);
 }
@@ -81,6 +172,17 @@ Java_com_rnwhisper_WhisperContext_initContextWithAsset(
     return reinterpret_cast<jlong>(context);
 }
+JNIEXPORT jlong JNICALL
+Java_com_rnwhisper_WhisperContext_initContextWithInputStream(
+    JNIEnv *env,
+    jobject thiz,
+    jobject input_stream
+) {
+    UNUSED(thiz);
+    struct whisper_context *context = nullptr;
+    context = whisper_init_from_input_stream(env, input_stream);
+    return reinterpret_cast<jlong>(context);
+}
 JNIEXPORT jint JNICALL
 Java_com_rnwhisper_WhisperContext_fullTranscribe(