whisper.rn 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -23,6 +23,8 @@ npm install whisper.rn
23
23
 
24
24
  For iOS, please re-run `npx pod-install` again.
25
25
 
26
+ If you want to use `medium` or `large` model, the [Extended Virtual Addressing](https://developer.apple.com/documentation/bundleresources/entitlements/com_apple_developer_kernel_extended-virtual-addressing) capability is recommended to enable on iOS project.
27
+
26
28
  For Android, it's recommended to use `ndkVersion = "24.0.8215888"` (or above) in your root project build configuration for Apple Silicon Macs. Otherwise please follow this trobleshooting [issue](./TROUBLESHOOTING.md#android-got-build-error-unknown-host-cpu-architecture-arm64-on-apple-silicon-macs).
27
29
 
28
30
  For Expo, you will need to prebuild the project before using it. See [Expo guide](https://docs.expo.io/guides/using-libraries/#using-a-library-in-a-expo-project) for more details.
@@ -45,6 +47,9 @@ Add the following line to ```android/app/src/main/AndroidManifest.xml```
45
47
  ```xml
46
48
  <uses-permission android:name="android.permission.RECORD_AUDIO" />
47
49
  ```
50
+ ## Tips & Tricks
51
+
52
+ The [Tips & Tricks](docs/TIPS.md) document is a collection of tips and tricks for using `whisper.rn`.
48
53
 
49
54
  ## Usage
50
55
 
@@ -121,7 +126,9 @@ module.exports = {
121
126
  }
122
127
  ```
123
128
 
124
- Please note that it will significantly increase the size of the app in release mode.
129
+ Please note that:
130
+ - It will significantly increase the size of the app in release mode.
131
+ - The RN packager is not allowed file size larger than 2GB, so it not able to use original f16 `large` model (2.9GB), you can use quantized models instead.
125
132
 
126
133
  ## Core ML support
127
134
 
@@ -191,7 +198,7 @@ See the [contributing guide](CONTRIBUTING.md) to learn how to contribute to the
191
198
 
192
199
  ## Troubleshooting
193
200
 
194
- See the [troubleshooting](TROUBLESHOOTING.md) if you encounter any problem while using `whisper.rn`.
201
+ See the [troubleshooting](docs/TROUBLESHOOTING.md) if you encounter any problem while using `whisper.rn`.
195
202
 
196
203
  ## License
197
204
 
@@ -71,7 +71,7 @@ public class WhisperContext {
71
71
  bufferSize = AudioRecord.getMinBufferSize(SAMPLE_RATE, CHANNEL_CONFIG, AUDIO_FORMAT);
72
72
  }
73
73
 
74
- private void resetRealtimeTranscribe() {
74
+ private void rewind() {
75
75
  shortBufferSlices = null;
76
76
  sliceNSamples = null;
77
77
  sliceIndex = 0;
@@ -97,7 +97,7 @@ public class WhisperContext {
97
97
  return state;
98
98
  }
99
99
 
100
- resetRealtimeTranscribe();
100
+ rewind();
101
101
 
102
102
  this.jobId = jobId;
103
103
 
@@ -269,7 +269,7 @@ public class WhisperContext {
269
269
  fullTranscribeSamples(options, true);
270
270
  } else if (isStopped) {
271
271
  // No next, cleanup
272
- resetRealtimeTranscribe();
272
+ rewind();
273
273
  }
274
274
  isTranscribing = false;
275
275
  }
@@ -282,7 +282,32 @@ public class WhisperContext {
282
282
  eventEmitter.emit(eventName, event);
283
283
  }
284
284
 
285
+ private void emitProgress(int progress) {
286
+ WritableMap event = Arguments.createMap();
287
+ event.putInt("contextId", WhisperContext.this.id);
288
+ event.putInt("jobId", jobId);
289
+ event.putInt("progress", progress);
290
+ eventEmitter.emit("@RNWhisper_onTranscribeProgress", event);
291
+ }
292
+
293
+ private static class ProgressCallback {
294
+ WhisperContext context;
295
+
296
+ public ProgressCallback(WhisperContext context) {
297
+ this.context = context;
298
+ }
299
+
300
+ void onProgress(int progress) {
301
+ context.emitProgress(progress);
302
+ }
303
+ }
304
+
285
305
  public WritableMap transcribeInputStream(int jobId, InputStream inputStream, ReadableMap options) throws IOException, Exception {
306
+ if (isCapturing || isTranscribing) {
307
+ throw new Exception("Context is already in capturing or transcribing");
308
+ }
309
+ rewind();
310
+
286
311
  this.jobId = jobId;
287
312
  isTranscribing = true;
288
313
  float[] audioData = decodeWaveFile(inputStream);
@@ -292,7 +317,9 @@ public class WhisperContext {
292
317
  if (code != 0) {
293
318
  throw new Exception("Failed to transcribe the file. Code: " + code);
294
319
  }
295
- return getTextSegments();
320
+ WritableMap result = getTextSegments();
321
+ result.putBoolean("isAborted", isStoppedByAction);
322
+ return result;
296
323
  }
297
324
 
298
325
  private int full(int jobId, ReadableMap options, float[] audioData, int audioDataLen) {
@@ -334,7 +361,9 @@ public class WhisperContext {
334
361
  // jstring language,
335
362
  options.hasKey("language") ? options.getString("language") : "auto",
336
363
  // jstring prompt
337
- options.hasKey("prompt") ? options.getString("prompt") : null
364
+ options.hasKey("prompt") ? options.getString("prompt") : null,
365
+ // ProgressCallback progressCallback
366
+ options.hasKey("onProgress") && options.getBoolean("onProgress") ? new ProgressCallback(this) : null
338
367
  );
339
368
  }
340
369
 
@@ -469,6 +498,7 @@ public class WhisperContext {
469
498
  }
470
499
  }
471
500
 
501
+
472
502
  protected static native long initContext(String modelPath);
473
503
  protected static native long initContextWithAsset(AssetManager assetManager, String modelPath);
474
504
  protected static native long initContextWithInputStream(PushbackInputStream inputStream);
@@ -491,7 +521,8 @@ public class WhisperContext {
491
521
  boolean speed_up,
492
522
  boolean translate,
493
523
  String language,
494
- String prompt
524
+ String prompt,
525
+ ProgressCallback progressCallback
495
526
  );
496
527
  protected static native void abortTranscribe(int jobId);
497
528
  protected static native void abortAllTranscribe();
@@ -184,6 +184,11 @@ Java_com_rnwhisper_WhisperContext_initContextWithInputStream(
184
184
  return reinterpret_cast<jlong>(context);
185
185
  }
186
186
 
187
+ struct progress_callback_context {
188
+ JNIEnv *env;
189
+ jobject progress_callback_instance;
190
+ };
191
+
187
192
  JNIEXPORT jint JNICALL
188
193
  Java_com_rnwhisper_WhisperContext_fullTranscribe(
189
194
  JNIEnv *env,
@@ -206,7 +211,8 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
206
211
  jboolean speed_up,
207
212
  jboolean translate,
208
213
  jstring language,
209
- jstring prompt
214
+ jstring prompt,
215
+ jobject progress_callback_instance
210
216
  ) {
211
217
  UNUSED(thiz);
212
218
  struct whisper_context *context = reinterpret_cast<struct whisper_context *>(context_ptr);
@@ -274,6 +280,21 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
274
280
  };
275
281
  params.encoder_begin_callback_user_data = rn_whisper_assign_abort_map(job_id);
276
282
 
283
+ if (progress_callback_instance != nullptr) {
284
+ params.progress_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, int progress, void * user_data) {
285
+ progress_callback_context *cb_ctx = (progress_callback_context *)user_data;
286
+ JNIEnv *env = cb_ctx->env;
287
+ jobject progress_callback_instance = cb_ctx->progress_callback_instance;
288
+ jclass progress_callback_class = env->GetObjectClass(progress_callback_instance);
289
+ jmethodID onProgress = env->GetMethodID(progress_callback_class, "onProgress", "(I)V");
290
+ env->CallVoidMethod(progress_callback_instance, onProgress, progress);
291
+ };
292
+ progress_callback_context *cb_ctx = new progress_callback_context;
293
+ cb_ctx->env = env;
294
+ cb_ctx->progress_callback_instance = env->NewGlobalRef(progress_callback_instance);
295
+ params.progress_callback_user_data = cb_ctx;
296
+ }
297
+
277
298
  LOGI("About to reset timings");
278
299
  whisper_reset_timings(context);
279
300
 
@@ -25,6 +25,13 @@ void rn_whisper_abort_transcribe(int job_id) {
25
25
  }
26
26
  }
27
27
 
28
+ bool rn_whisper_transcribe_is_aborted(int job_id) {
29
+ if (abort_map.find(job_id) != abort_map.end()) {
30
+ return abort_map[job_id];
31
+ }
32
+ return false;
33
+ }
34
+
28
35
  void rn_whisper_abort_all_transcribe() {
29
36
  for (auto it = abort_map.begin(); it != abort_map.end(); ++it) {
30
37
  it->second = true;
package/cpp/rn-whisper.h CHANGED
@@ -8,6 +8,7 @@ extern "C" {
8
8
  bool* rn_whisper_assign_abort_map(int job_id);
9
9
  void rn_whisper_remove_abort_map(int job_id);
10
10
  void rn_whisper_abort_transcribe(int job_id);
11
+ bool rn_whisper_transcribe_is_aborted(int job_id);
11
12
  void rn_whisper_abort_all_transcribe();
12
13
 
13
14
  #ifdef __cplusplus