whisper.rn 0.3.4 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -21,13 +21,26 @@ React Native binding of [whisper.cpp](https://github.com/ggerganov/whisper.cpp).
21
21
  npm install whisper.rn
22
22
  ```
23
23
 
24
- For iOS, please re-run `npx pod-install` again.
24
+ #### iOS
25
+
26
+ Please re-run `npx pod-install` again.
27
+
28
+ #### Android
25
29
 
26
30
  If you want to use `medium` or `large` model, the [Extended Virtual Addressing](https://developer.apple.com/documentation/bundleresources/entitlements/com_apple_developer_kernel_extended-virtual-addressing) capability is recommended to enable on iOS project.
27
31
 
28
32
  For Android, it's recommended to use `ndkVersion = "24.0.8215888"` (or above) in your root project build configuration for Apple Silicon Macs. Otherwise please follow this trobleshooting [issue](./TROUBLESHOOTING.md#android-got-build-error-unknown-host-cpu-architecture-arm64-on-apple-silicon-macs).
29
33
 
30
- For Expo, you will need to prebuild the project before using it. See [Expo guide](https://docs.expo.io/guides/using-libraries/#using-a-library-in-a-expo-project) for more details.
34
+ Don't forget to add proguard rule if it's enabled in project (android/app/proguard-rules.pro):
35
+
36
+ ```proguard
37
+ # whisper.rn
38
+ -keep class com.rnwhisper.** { *; }
39
+ ```
40
+
41
+ #### Expo
42
+
43
+ You will need to prebuild the project before using it. See [Expo guide](https://docs.expo.io/guides/using-libraries/#using-a-library-in-a-expo-project) for more details.
31
44
 
32
45
  ## Add Microphone Permissions (Optional)
33
46
 
@@ -30,6 +30,11 @@ def getExtOrIntegerDefault(name) {
30
30
  return rootProject.ext.has(name) ? rootProject.ext.get(name) : (project.properties["RNWhisper_" + name]).toInteger()
31
31
  }
32
32
 
33
+ def reactNativeArchitectures() {
34
+ def value = project.getProperties().get("reactNativeArchitectures")
35
+ return value ? value.split(",") : ["armeabi-v7a", "x86", "x86_64", "arm64-v8a"]
36
+ }
37
+
33
38
  android {
34
39
  ndkVersion getExtOrDefault("ndkVersion")
35
40
  compileSdkVersion getExtOrIntegerDefault("compileSdkVersion")
@@ -38,10 +43,15 @@ android {
38
43
  minSdkVersion getExtOrIntegerDefault("minSdkVersion")
39
44
  targetSdkVersion getExtOrIntegerDefault("targetSdkVersion")
40
45
  buildConfigField "boolean", "IS_NEW_ARCHITECTURE_ENABLED", isNewArchitectureEnabled().toString()
46
+ externalNativeBuild {
47
+ cmake {
48
+ abiFilters (*reactNativeArchitectures())
49
+ }
50
+ }
41
51
  }
42
52
  externalNativeBuild {
43
- ndkBuild {
44
- path 'src/main/jni/whisper/Android.mk'
53
+ cmake {
54
+ path = file('src/main/CMakeLists.txt')
45
55
  }
46
56
  }
47
57
  buildTypes {
@@ -0,0 +1,55 @@
1
+ cmake_minimum_required(VERSION 3.10)
2
+
3
+ project(whisper.rn)
4
+
5
+ set(CMAKE_CXX_STANDARD 11)
6
+ set(RNWHISPER_LIB_DIR ${CMAKE_SOURCE_DIR}/../../../cpp)
7
+
8
+ set(
9
+ SOURCE_FILES
10
+ ${RNWHISPER_LIB_DIR}/ggml.c
11
+ ${RNWHISPER_LIB_DIR}/whisper.cpp
12
+ ${RNWHISPER_LIB_DIR}/rn-whisper.cpp
13
+ ${CMAKE_SOURCE_DIR}/jni.cpp
14
+ )
15
+
16
+ find_library(LOG_LIB log)
17
+
18
+ function(build_library target_name)
19
+ add_library(
20
+ ${target_name}
21
+ SHARED
22
+ ${SOURCE_FILES}
23
+ )
24
+
25
+ target_link_libraries(${target_name} ${LOG_LIB} android)
26
+
27
+ if (${target_name} STREQUAL "whisper_v8fp16_va")
28
+ target_compile_options(${target_name} PRIVATE -march=armv8.2-a+fp16)
29
+ elseif (${target_name} STREQUAL "whisper_vfpv4")
30
+ target_compile_options(${target_name} PRIVATE -mfpu=neon-vfpv4)
31
+ endif ()
32
+
33
+ # NOTE: If you want to debug the native code, you can uncomment if and endif
34
+ # if (NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug")
35
+
36
+ target_compile_options(${target_name} PRIVATE -O3 -DNDEBUG -pthread)
37
+ target_compile_options(${target_name} PRIVATE -fvisibility=hidden -fvisibility-inlines-hidden)
38
+ target_compile_options(${target_name} PRIVATE -ffunction-sections -fdata-sections)
39
+
40
+ target_link_options(${target_name} PRIVATE -Wl,--gc-sections)
41
+ target_link_options(${target_name} PRIVATE -Wl,--exclude-libs,ALL)
42
+ target_link_options(${target_name} PRIVATE -flto)
43
+
44
+ # endif ()
45
+ endfunction()
46
+
47
+ build_library("whisper") # Default target
48
+
49
+ if (${ANDROID_ABI} STREQUAL "arm64-v8a")
50
+ build_library("whisper_v8fp16_va")
51
+ elseif (${ANDROID_ABI} STREQUAL "armeabi-v7a")
52
+ build_library("whisper_vfpv4")
53
+ endif ()
54
+
55
+ include_directories(${RNWHISPER_LIB_DIR})
@@ -0,0 +1,328 @@
1
+ package com.rnwhisper;
2
+
3
+ import androidx.annotation.NonNull;
4
+ import android.util.Log;
5
+ import android.os.Build;
6
+ import android.os.Handler;
7
+ import android.os.AsyncTask;
8
+ import android.media.AudioRecord;
9
+
10
+ import com.facebook.react.bridge.Promise;
11
+ import com.facebook.react.bridge.ReactApplicationContext;
12
+ import com.facebook.react.bridge.ReactMethod;
13
+ import com.facebook.react.bridge.LifecycleEventListener;
14
+ import com.facebook.react.bridge.ReadableMap;
15
+ import com.facebook.react.bridge.WritableMap;
16
+
17
+ import java.util.HashMap;
18
+ import java.util.Random;
19
+ import java.io.File;
20
+ import java.io.FileInputStream;
21
+ import java.io.PushbackInputStream;
22
+
23
+ public class RNWhisper implements LifecycleEventListener {
24
+ public static final String NAME = "RNWhisper";
25
+
26
+ private ReactApplicationContext reactContext;
27
+ private Downloader downloader;
28
+
29
+ public RNWhisper(ReactApplicationContext reactContext) {
30
+ reactContext.addLifecycleEventListener(this);
31
+ this.reactContext = reactContext;
32
+ this.downloader = new Downloader(reactContext);
33
+ }
34
+
35
+ public HashMap<String, Object> getTypedExportedConstants() {
36
+ HashMap<String, Object> constants = new HashMap<>();
37
+
38
+ // iOS only constants, put for passing type checks
39
+ constants.put("useCoreML", false);
40
+ constants.put("coreMLAllowFallback", false);
41
+
42
+ return constants;
43
+ }
44
+
45
+ private HashMap<AsyncTask, String> tasks = new HashMap<>();
46
+
47
+ private HashMap<Integer, WhisperContext> contexts = new HashMap<>();
48
+
49
+ private int getResourceIdentifier(String filePath) {
50
+ int identifier = reactContext.getResources().getIdentifier(
51
+ filePath,
52
+ "drawable",
53
+ reactContext.getPackageName()
54
+ );
55
+ if (identifier == 0) {
56
+ identifier = reactContext.getResources().getIdentifier(
57
+ filePath,
58
+ "raw",
59
+ reactContext.getPackageName()
60
+ );
61
+ }
62
+ return identifier;
63
+ }
64
+
65
+ public void initContext(final ReadableMap options, final Promise promise) {
66
+ AsyncTask task = new AsyncTask<Void, Void, Integer>() {
67
+ private Exception exception;
68
+
69
+ @Override
70
+ protected Integer doInBackground(Void... voids) {
71
+ try {
72
+ String modelPath = options.getString("filePath");
73
+ boolean isBundleAsset = options.getBoolean("isBundleAsset");
74
+
75
+ String modelFilePath = modelPath;
76
+ if (!isBundleAsset && (modelPath.startsWith("http://") || modelPath.startsWith("https://"))) {
77
+ modelFilePath = downloader.downloadFile(modelPath);
78
+ }
79
+
80
+ long context;
81
+ int resId = getResourceIdentifier(modelFilePath);
82
+ if (resId > 0) {
83
+ context = WhisperContext.initContextWithInputStream(
84
+ new PushbackInputStream(reactContext.getResources().openRawResource(resId))
85
+ );
86
+ } else if (isBundleAsset) {
87
+ context = WhisperContext.initContextWithAsset(reactContext.getAssets(), modelFilePath);
88
+ } else {
89
+ context = WhisperContext.initContext(modelFilePath);
90
+ }
91
+ if (context == 0) {
92
+ throw new Exception("Failed to initialize context");
93
+ }
94
+ int id = Math.abs(new Random().nextInt());
95
+ WhisperContext whisperContext = new WhisperContext(id, reactContext, context);
96
+ contexts.put(id, whisperContext);
97
+ return id;
98
+ } catch (Exception e) {
99
+ exception = e;
100
+ return null;
101
+ }
102
+ }
103
+
104
+ @Override
105
+ protected void onPostExecute(Integer id) {
106
+ if (exception != null) {
107
+ promise.reject(exception);
108
+ return;
109
+ }
110
+ promise.resolve(id);
111
+ tasks.remove(this);
112
+ }
113
+ }.execute();
114
+ tasks.put(task, "initContext");
115
+ }
116
+
117
+ public void transcribeFile(double id, double jobId, String filePath, ReadableMap options, Promise promise) {
118
+ final WhisperContext context = contexts.get((int) id);
119
+ if (context == null) {
120
+ promise.reject("Context not found");
121
+ return;
122
+ }
123
+ if (context.isCapturing()) {
124
+ promise.reject("The context is in realtime transcribe mode");
125
+ return;
126
+ }
127
+ if (context.isTranscribing()) {
128
+ promise.reject("Context is already transcribing");
129
+ return;
130
+ }
131
+ AsyncTask task = new AsyncTask<Void, Void, WritableMap>() {
132
+ private Exception exception;
133
+
134
+ @Override
135
+ protected WritableMap doInBackground(Void... voids) {
136
+ try {
137
+ String waveFilePath = filePath;
138
+
139
+ if (filePath.startsWith("http://") || filePath.startsWith("https://")) {
140
+ waveFilePath = downloader.downloadFile(filePath);
141
+ }
142
+
143
+ int resId = getResourceIdentifier(waveFilePath);
144
+ if (resId > 0) {
145
+ return context.transcribeInputStream(
146
+ (int) jobId,
147
+ reactContext.getResources().openRawResource(resId),
148
+ options
149
+ );
150
+ }
151
+
152
+ return context.transcribeInputStream(
153
+ (int) jobId,
154
+ new FileInputStream(new File(waveFilePath)),
155
+ options
156
+ );
157
+ } catch (Exception e) {
158
+ exception = e;
159
+ return null;
160
+ }
161
+ }
162
+
163
+ @Override
164
+ protected void onPostExecute(WritableMap data) {
165
+ if (exception != null) {
166
+ promise.reject(exception);
167
+ return;
168
+ }
169
+ promise.resolve(data);
170
+ tasks.remove(this);
171
+ }
172
+ }.execute();
173
+ tasks.put(task, "transcribeFile-" + id);
174
+ }
175
+
176
+ public void startRealtimeTranscribe(double id, double jobId, ReadableMap options, Promise promise) {
177
+ final WhisperContext context = contexts.get((int) id);
178
+ if (context == null) {
179
+ promise.reject("Context not found");
180
+ return;
181
+ }
182
+ if (context.isCapturing()) {
183
+ promise.reject("Context is already in capturing");
184
+ return;
185
+ }
186
+ int state = context.startRealtimeTranscribe((int) jobId, options);
187
+ if (state == AudioRecord.STATE_INITIALIZED) {
188
+ promise.resolve(null);
189
+ return;
190
+ }
191
+ promise.reject("Failed to start realtime transcribe. State: " + state);
192
+ }
193
+
194
+ public void abortTranscribe(double id, double jobId, Promise promise) {
195
+ WhisperContext context = contexts.get((int) id);
196
+ if (context == null) {
197
+ promise.reject("Context not found");
198
+ return;
199
+ }
200
+ AsyncTask task = new AsyncTask<Void, Void, Void>() {
201
+ private Exception exception;
202
+
203
+ @Override
204
+ protected Void doInBackground(Void... voids) {
205
+ try {
206
+ context.stopTranscribe((int) jobId);
207
+ AsyncTask completionTask = null;
208
+ for (AsyncTask task : tasks.keySet()) {
209
+ if (tasks.get(task).equals("transcribeFile-" + id)) {
210
+ task.get();
211
+ break;
212
+ }
213
+ }
214
+ } catch (Exception e) {
215
+ exception = e;
216
+ }
217
+ return null;
218
+ }
219
+
220
+ @Override
221
+ protected void onPostExecute(Void result) {
222
+ if (exception != null) {
223
+ promise.reject(exception);
224
+ return;
225
+ }
226
+ promise.resolve(null);
227
+ tasks.remove(this);
228
+ }
229
+ }.execute();
230
+ tasks.put(task, "abortTranscribe-" + id);
231
+ }
232
+
233
+ public void releaseContext(double id, Promise promise) {
234
+ final int contextId = (int) id;
235
+ AsyncTask task = new AsyncTask<Void, Void, Void>() {
236
+ private Exception exception;
237
+
238
+ @Override
239
+ protected Void doInBackground(Void... voids) {
240
+ try {
241
+ WhisperContext context = contexts.get(contextId);
242
+ if (context == null) {
243
+ throw new Exception("Context " + id + " not found");
244
+ }
245
+ context.stopCurrentTranscribe();
246
+ AsyncTask completionTask = null;
247
+ for (AsyncTask task : tasks.keySet()) {
248
+ if (tasks.get(task).equals("transcribeFile-" + contextId)) {
249
+ task.get();
250
+ break;
251
+ }
252
+ }
253
+ context.release();
254
+ contexts.remove(contextId);
255
+ } catch (Exception e) {
256
+ exception = e;
257
+ }
258
+ return null;
259
+ }
260
+
261
+ @Override
262
+ protected void onPostExecute(Void result) {
263
+ if (exception != null) {
264
+ promise.reject(exception);
265
+ return;
266
+ }
267
+ promise.resolve(null);
268
+ tasks.remove(this);
269
+ }
270
+ }.execute();
271
+ tasks.put(task, "releaseContext-" + id);
272
+ }
273
+
274
+ public void releaseAllContexts(Promise promise) {
275
+ AsyncTask task = new AsyncTask<Void, Void, Void>() {
276
+ private Exception exception;
277
+
278
+ @Override
279
+ protected Void doInBackground(Void... voids) {
280
+ try {
281
+ onHostDestroy();
282
+ } catch (Exception e) {
283
+ exception = e;
284
+ }
285
+ return null;
286
+ }
287
+
288
+ @Override
289
+ protected void onPostExecute(Void result) {
290
+ if (exception != null) {
291
+ promise.reject(exception);
292
+ return;
293
+ }
294
+ promise.resolve(null);
295
+ tasks.remove(this);
296
+ }
297
+ }.execute();
298
+ tasks.put(task, "releaseAllContexts");
299
+ }
300
+
301
+ @Override
302
+ public void onHostResume() {
303
+ }
304
+
305
+ @Override
306
+ public void onHostPause() {
307
+ }
308
+
309
+ @Override
310
+ public void onHostDestroy() {
311
+ for (WhisperContext context : contexts.values()) {
312
+ context.stopCurrentTranscribe();
313
+ }
314
+ for (AsyncTask task : tasks.keySet()) {
315
+ try {
316
+ task.get();
317
+ } catch (Exception e) {
318
+ Log.e(NAME, "Failed to wait for task", e);
319
+ }
320
+ }
321
+ for (WhisperContext context : contexts.values()) {
322
+ context.release();
323
+ }
324
+ WhisperContext.abortAllTranscribe(); // graceful abort
325
+ contexts.clear();
326
+ downloader.clearCache();
327
+ }
328
+ }
@@ -61,6 +61,7 @@ public class WhisperContext {
61
61
  private boolean isCapturing = false;
62
62
  private boolean isStoppedByAction = false;
63
63
  private boolean isTranscribing = false;
64
+ private Thread rootFullHandler = null;
64
65
  private Thread fullHandler = null;
65
66
 
66
67
  public WhisperContext(int id, ReactApplicationContext reactContext, long context) {
@@ -81,6 +82,7 @@ public class WhisperContext {
81
82
  isCapturing = false;
82
83
  isStoppedByAction = false;
83
84
  isTranscribing = false;
85
+ rootFullHandler = null;
84
86
  fullHandler = null;
85
87
  }
86
88
 
@@ -117,7 +119,7 @@ public class WhisperContext {
117
119
  isCapturing = true;
118
120
  recorder.startRecording();
119
121
 
120
- new Thread(new Runnable() {
122
+ rootFullHandler = new Thread(new Runnable() {
121
123
  @Override
122
124
  public void run() {
123
125
  try {
@@ -195,7 +197,8 @@ public class WhisperContext {
195
197
  recorder = null;
196
198
  }
197
199
  }
198
- }).start();
200
+ });
201
+ rootFullHandler.start();
199
202
  return state;
200
203
  }
201
204
 
@@ -402,6 +405,14 @@ public class WhisperContext {
402
405
  abortTranscribe(jobId);
403
406
  isCapturing = false;
404
407
  isStoppedByAction = true;
408
+ if (rootFullHandler != null) {
409
+ try {
410
+ rootFullHandler.join();
411
+ } catch (Exception e) {
412
+ Log.e(NAME, "Error joining rootFullHandler: " + e.getMessage());
413
+ }
414
+ rootFullHandler = null;
415
+ }
405
416
  }
406
417
 
407
418
  public void stopCurrentTranscribe() {