whisper.rn 0.2.4 → 0.3.0-rc.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -5
- package/android/src/main/java/com/rnwhisper/RNWhisperModule.java +7 -2
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +7 -6
- package/android/src/main/jni/whisper/jni.cpp +54 -7
- package/cpp/ggml.c +6339 -1662
- package/cpp/ggml.h +741 -554
- package/cpp/rn-whisper.cpp +0 -23
- package/cpp/rn-whisper.h +0 -6
- package/cpp/whisper.cpp +928 -625
- package/cpp/whisper.h +26 -2
- package/ios/RNWhisper.mm +19 -1
- package/ios/RNWhisperContext.mm +8 -10
- package/lib/commonjs/index.js +12 -2
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/index.js +9 -2
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/index.d.ts +7 -2
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/{index.tsx → index.ts} +10 -4
- package/whisper-rn.podspec +9 -3
package/README.md
CHANGED
|
@@ -20,6 +20,8 @@ npm install whisper.rn
|
|
|
20
20
|
|
|
21
21
|
Then re-run `npx pod-install` again for iOS.
|
|
22
22
|
|
|
23
|
+
For Expo, you will need to prebuild the project before using it. See [Expo guide](https://docs.expo.io/guides/using-libraries/#using-a-library-in-a-expo-project) for more details.
|
|
24
|
+
|
|
23
25
|
## Add Microphone Permissions (Optional)
|
|
24
26
|
|
|
25
27
|
If you want to use realtime transcribe, you need to add the microphone permission to your app.
|
|
@@ -31,6 +33,8 @@ Add these lines to ```ios/[YOU_APP_NAME]/info.plist```
|
|
|
31
33
|
<string>This app requires microphone access in order to transcribe speech</string>
|
|
32
34
|
```
|
|
33
35
|
|
|
36
|
+
For tvOS, please note that the microphone is not supported.
|
|
37
|
+
|
|
34
38
|
### Android
|
|
35
39
|
Add the following line to ```android/app/src/main/AndroidManifest.xml```
|
|
36
40
|
```xml
|
|
@@ -42,11 +46,12 @@ Add the following line to ```android/app/src/main/AndroidManifest.xml```
|
|
|
42
46
|
```js
|
|
43
47
|
import { initWhisper } from 'whisper.rn'
|
|
44
48
|
|
|
45
|
-
const
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
+
const whisperContext = await initWhisper({
|
|
50
|
+
filePath: 'file://.../ggml-base.en.bin',
|
|
51
|
+
isBundleAsset: false, // Set to true if you want to load the model from bundle resources, the filePath will be like `ggml-base.en.bin`
|
|
52
|
+
})
|
|
49
53
|
|
|
54
|
+
const sampleFilePath = 'file://.../sample.wav'
|
|
50
55
|
const options = { language: 'en' }
|
|
51
56
|
const { stop, promise } = whisperContext.transcribe(sampleFilePath, options)
|
|
52
57
|
|
|
@@ -74,7 +79,31 @@ subscribe(evt => {
|
|
|
74
79
|
|
|
75
80
|
In Android, you may need to request the microphone permission by [`PermissionAndroid`](https://reactnative.dev/docs/permissionsandroid).
|
|
76
81
|
|
|
77
|
-
The documentation is not ready yet, please see the comments of [index](./src/index.
|
|
82
|
+
The documentation is not ready yet, please see the comments of [index](./src/index.ts) file for more details at the moment.
|
|
83
|
+
|
|
84
|
+
## Core ML support
|
|
85
|
+
|
|
86
|
+
__*Platform: iOS 15.0+, tvOS 15.0+*__
|
|
87
|
+
|
|
88
|
+
To use Core ML on iOS, you will need to have the Core ML model files.
|
|
89
|
+
|
|
90
|
+
The `.mlmodelc` model files is load depend on the ggml model file path. For example, if your ggml model path is `ggml-base.en.bin`, the Core ML model path will be `ggml-base.en-encoder.mlmodelc`. Please note that the ggml model is still needed as decoder or encoder fallback.
|
|
91
|
+
|
|
92
|
+
Currently there is no official way to get the Core ML models by URL, you will need to convert the ggml model to Core ML model folder by yourself. Please see [Core ML Support](https://github.com/ggerganov/whisper.cpp#core-ml-support) of whisper.cpp for more details.
|
|
93
|
+
|
|
94
|
+
During the `.mlmodelc` is a directory, you will need to download 5 files:
|
|
95
|
+
|
|
96
|
+
```json5
|
|
97
|
+
[
|
|
98
|
+
'model.mil',
|
|
99
|
+
'metadata.json',
|
|
100
|
+
'coremldata.bin',
|
|
101
|
+
'weights/weights.bin',
|
|
102
|
+
'analysis/coremldata.bin',
|
|
103
|
+
]
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
Or just add them to your app's bundle resourcesas, like the example app does, but this would increase the app size significantly.
|
|
78
107
|
|
|
79
108
|
## Run with example
|
|
80
109
|
|
|
@@ -40,14 +40,19 @@ public class RNWhisperModule extends ReactContextBaseJavaModule implements Lifec
|
|
|
40
40
|
private HashMap<Integer, WhisperContext> contexts = new HashMap<>();
|
|
41
41
|
|
|
42
42
|
@ReactMethod
|
|
43
|
-
public void initContext(final String modelPath, final Promise promise) {
|
|
43
|
+
public void initContext(final String modelPath, final boolean isBundleAsset, final Promise promise) {
|
|
44
44
|
new AsyncTask<Void, Void, Integer>() {
|
|
45
45
|
private Exception exception;
|
|
46
46
|
|
|
47
47
|
@Override
|
|
48
48
|
protected Integer doInBackground(Void... voids) {
|
|
49
49
|
try {
|
|
50
|
-
long context
|
|
50
|
+
long context;
|
|
51
|
+
if (isBundleAsset) {
|
|
52
|
+
context = WhisperContext.initContextWithAsset(reactContext.getAssets(), modelPath);
|
|
53
|
+
} else {
|
|
54
|
+
context = WhisperContext.initContext(modelPath);
|
|
55
|
+
}
|
|
51
56
|
if (context == 0) {
|
|
52
57
|
throw new Exception("Failed to initialize context");
|
|
53
58
|
}
|
|
@@ -186,8 +186,6 @@ public class WhisperContext {
|
|
|
186
186
|
if (fullHandler != null) {
|
|
187
187
|
fullHandler.join(); // Wait for full transcribe to finish
|
|
188
188
|
}
|
|
189
|
-
// Cleanup
|
|
190
|
-
resetRealtimeTranscribe();
|
|
191
189
|
recorder.stop();
|
|
192
190
|
} catch (Exception e) {
|
|
193
191
|
e.printStackTrace();
|
|
@@ -237,12 +235,11 @@ public class WhisperContext {
|
|
|
237
235
|
}
|
|
238
236
|
|
|
239
237
|
nSamplesOfIndex = sliceNSamples.get(transcribeSliceIndex);
|
|
240
|
-
|
|
241
|
-
isStoppedByAction ||
|
|
238
|
+
boolean isStopped = isStoppedByAction ||
|
|
242
239
|
!isCapturing &&
|
|
243
240
|
nSamplesTranscribing == nSamplesOfIndex &&
|
|
244
|
-
sliceIndex == transcribeSliceIndex
|
|
245
|
-
) {
|
|
241
|
+
sliceIndex == transcribeSliceIndex;
|
|
242
|
+
if (isStopped) {
|
|
246
243
|
payload.putBoolean("isCapturing", false);
|
|
247
244
|
payload.putBoolean("isStoppedByAction", isStoppedByAction);
|
|
248
245
|
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribeEnd", payload);
|
|
@@ -266,6 +263,9 @@ public class WhisperContext {
|
|
|
266
263
|
if (!isCapturing && nSamplesTranscribing != nSamplesOfIndex) {
|
|
267
264
|
// If no more capturing, continue transcribing until all slices are transcribed
|
|
268
265
|
fullTranscribeSamples(options, true);
|
|
266
|
+
} else if (isStopped) {
|
|
267
|
+
// No next, cleanup
|
|
268
|
+
resetRealtimeTranscribe();
|
|
269
269
|
}
|
|
270
270
|
isTranscribing = false;
|
|
271
271
|
}
|
|
@@ -469,6 +469,7 @@ public class WhisperContext {
|
|
|
469
469
|
}
|
|
470
470
|
|
|
471
471
|
protected static native long initContext(String modelPath);
|
|
472
|
+
protected static native long initContextWithAsset(AssetManager assetManager, String modelPath);
|
|
472
473
|
protected static native int fullTranscribe(
|
|
473
474
|
int job_id,
|
|
474
475
|
long context,
|
|
@@ -20,6 +20,39 @@ static inline int min(int a, int b) {
|
|
|
20
20
|
return (a < b) ? a : b;
|
|
21
21
|
}
|
|
22
22
|
|
|
23
|
+
static size_t asset_read(void *ctx, void *output, size_t read_size) {
|
|
24
|
+
return AAsset_read((AAsset *) ctx, output, read_size);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
static bool asset_is_eof(void *ctx) {
|
|
28
|
+
return AAsset_getRemainingLength64((AAsset *) ctx) <= 0;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
static void asset_close(void *ctx) {
|
|
32
|
+
AAsset_close((AAsset *) ctx);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
static struct whisper_context *whisper_init_from_asset(
|
|
36
|
+
JNIEnv *env,
|
|
37
|
+
jobject assetManager,
|
|
38
|
+
const char *asset_path
|
|
39
|
+
) {
|
|
40
|
+
LOGI("Loading model from asset '%s'\n", asset_path);
|
|
41
|
+
AAssetManager *asset_manager = AAssetManager_fromJava(env, assetManager);
|
|
42
|
+
AAsset *asset = AAssetManager_open(asset_manager, asset_path, AASSET_MODE_STREAMING);
|
|
43
|
+
if (!asset) {
|
|
44
|
+
LOGW("Failed to open '%s'\n", asset_path);
|
|
45
|
+
return NULL;
|
|
46
|
+
}
|
|
47
|
+
whisper_model_loader loader = {
|
|
48
|
+
.context = asset,
|
|
49
|
+
.read = &asset_read,
|
|
50
|
+
.eof = &asset_is_eof,
|
|
51
|
+
.close = &asset_close
|
|
52
|
+
};
|
|
53
|
+
return whisper_init(&loader);
|
|
54
|
+
}
|
|
55
|
+
|
|
23
56
|
extern "C" {
|
|
24
57
|
|
|
25
58
|
JNIEXPORT jlong JNICALL
|
|
@@ -33,6 +66,22 @@ Java_com_rnwhisper_WhisperContext_initContext(
|
|
|
33
66
|
return reinterpret_cast<jlong>(context);
|
|
34
67
|
}
|
|
35
68
|
|
|
69
|
+
JNIEXPORT jlong JNICALL
|
|
70
|
+
Java_com_rnwhisper_WhisperContext_initContextWithAsset(
|
|
71
|
+
JNIEnv *env,
|
|
72
|
+
jobject thiz,
|
|
73
|
+
jobject asset_manager,
|
|
74
|
+
jstring model_path_str
|
|
75
|
+
) {
|
|
76
|
+
UNUSED(thiz);
|
|
77
|
+
struct whisper_context *context = nullptr;
|
|
78
|
+
const char *model_path_chars = env->GetStringUTFChars(model_path_str, nullptr);
|
|
79
|
+
context = whisper_init_from_asset(env, asset_manager, model_path_chars);
|
|
80
|
+
env->ReleaseStringUTFChars(model_path_str, model_path_chars);
|
|
81
|
+
return reinterpret_cast<jlong>(context);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
|
|
36
85
|
JNIEXPORT jint JNICALL
|
|
37
86
|
Java_com_rnwhisper_WhisperContext_fullTranscribe(
|
|
38
87
|
JNIEnv *env,
|
|
@@ -61,7 +110,9 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
|
|
|
61
110
|
struct whisper_context *context = reinterpret_cast<struct whisper_context *>(context_ptr);
|
|
62
111
|
jfloat *audio_data_arr = env->GetFloatArrayElements(audio_data, nullptr);
|
|
63
112
|
|
|
64
|
-
int max_threads =
|
|
113
|
+
int max_threads = std::thread::hardware_concurrency();
|
|
114
|
+
// Use 2 threads by default on 4-core devices, 4 threads on more cores
|
|
115
|
+
int default_n_threads = max_threads == 4 ? 2 : min(4, max_threads);
|
|
65
116
|
|
|
66
117
|
LOGI("About to create params");
|
|
67
118
|
|
|
@@ -79,7 +130,7 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
|
|
|
79
130
|
params.translate = translate;
|
|
80
131
|
const char *language_chars = env->GetStringUTFChars(language, nullptr);
|
|
81
132
|
params.language = language_chars;
|
|
82
|
-
params.n_threads = n_threads > 0 ? n_threads :
|
|
133
|
+
params.n_threads = n_threads > 0 ? n_threads : default_n_threads;
|
|
83
134
|
params.speed_up = speed_up;
|
|
84
135
|
params.offset_ms = 0;
|
|
85
136
|
params.no_context = true;
|
|
@@ -112,11 +163,7 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
|
|
|
112
163
|
params.temperature_inc = temperature_inc;
|
|
113
164
|
}
|
|
114
165
|
if (prompt != nullptr) {
|
|
115
|
-
|
|
116
|
-
context,
|
|
117
|
-
params,
|
|
118
|
-
new std::string(env->GetStringUTFChars(prompt, nullptr))
|
|
119
|
-
);
|
|
166
|
+
params.initial_prompt = env->GetStringUTFChars(prompt, nullptr);
|
|
120
167
|
}
|
|
121
168
|
|
|
122
169
|
params.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) {
|