whisper.rn 0.4.0-rc.2 → 0.4.0-rc.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/android/src/main/CMakeLists.txt +2 -0
  2. package/android/src/main/java/com/rnwhisper/RNWhisper.java +6 -1
  3. package/android/src/main/java/com/rnwhisper/WhisperContext.java +29 -15
  4. package/android/src/main/jni.cpp +6 -2
  5. package/cpp/ggml-alloc.c +413 -280
  6. package/cpp/ggml-alloc.h +67 -8
  7. package/cpp/ggml-backend-impl.h +87 -0
  8. package/cpp/ggml-backend.c +950 -0
  9. package/cpp/ggml-backend.h +136 -0
  10. package/cpp/ggml-impl.h +243 -0
  11. package/cpp/{ggml-metal.metal → ggml-metal-whisper.metal} +591 -121
  12. package/cpp/ggml-metal.h +21 -0
  13. package/cpp/ggml-metal.m +623 -234
  14. package/cpp/ggml-quants.c +7377 -0
  15. package/cpp/ggml-quants.h +224 -0
  16. package/cpp/ggml.c +3773 -4455
  17. package/cpp/ggml.h +279 -146
  18. package/cpp/whisper.cpp +182 -103
  19. package/cpp/whisper.h +48 -11
  20. package/ios/RNWhisper.mm +8 -2
  21. package/ios/RNWhisperContext.h +6 -2
  22. package/ios/RNWhisperContext.mm +97 -26
  23. package/jest/mock.js +1 -1
  24. package/lib/commonjs/NativeRNWhisper.js.map +1 -1
  25. package/lib/commonjs/index.js +28 -9
  26. package/lib/commonjs/index.js.map +1 -1
  27. package/lib/commonjs/version.json +1 -1
  28. package/lib/module/NativeRNWhisper.js.map +1 -1
  29. package/lib/module/index.js +28 -9
  30. package/lib/module/index.js.map +1 -1
  31. package/lib/module/version.json +1 -1
  32. package/lib/typescript/NativeRNWhisper.d.ts +7 -1
  33. package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
  34. package/lib/typescript/index.d.ts +8 -3
  35. package/lib/typescript/index.d.ts.map +1 -1
  36. package/package.json +1 -1
  37. package/src/NativeRNWhisper.ts +8 -1
  38. package/src/index.ts +30 -18
  39. package/src/version.json +1 -1
  40. package/whisper-rn.podspec +1 -2
@@ -9,6 +9,8 @@ set(
9
9
  SOURCE_FILES
10
10
  ${RNWHISPER_LIB_DIR}/ggml.c
11
11
  ${RNWHISPER_LIB_DIR}/ggml-alloc.c
12
+ ${RNWHISPER_LIB_DIR}/ggml-backend.c
13
+ ${RNWHISPER_LIB_DIR}/ggml-quants.c
12
14
  ${RNWHISPER_LIB_DIR}/whisper.cpp
13
15
  ${RNWHISPER_LIB_DIR}/rn-whisper.cpp
14
16
  ${CMAKE_SOURCE_DIR}/jni.cpp
@@ -13,6 +13,7 @@ import com.facebook.react.bridge.ReactMethod;
13
13
  import com.facebook.react.bridge.LifecycleEventListener;
14
14
  import com.facebook.react.bridge.ReadableMap;
15
15
  import com.facebook.react.bridge.WritableMap;
16
+ import com.facebook.react.bridge.Arguments;
16
17
 
17
18
  import java.util.HashMap;
18
19
  import java.util.Random;
@@ -107,7 +108,11 @@ public class RNWhisper implements LifecycleEventListener {
107
108
  promise.reject(exception);
108
109
  return;
109
110
  }
110
- promise.resolve(id);
111
+ WritableMap result = Arguments.createMap();
112
+ result.putInt("contextId", id);
113
+ result.putBoolean("gpu", false);
114
+ result.putString("reasonNoGPU", "Currently not supported");
115
+ promise.resolve(result);
111
116
  tasks.remove(this);
112
117
  }
113
118
  }.execute();
@@ -82,8 +82,9 @@ public class WhisperContext {
82
82
  private boolean vad(ReadableMap options, short[] shortBuffer, int nSamples, int n) {
83
83
  boolean isSpeech = true;
84
84
  if (!isTranscribing && options.hasKey("useVad") && options.getBoolean("useVad")) {
85
- int vadSec = options.hasKey("vadMs") ? options.getInt("vadMs") / 1000 : 2;
86
- int sampleSize = vadSec * SAMPLE_RATE;
85
+ int vadMs = options.hasKey("vadMs") ? options.getInt("vadMs") : 2000;
86
+ if (vadMs < 2000) vadMs = 2000;
87
+ int sampleSize = (int) (SAMPLE_RATE * vadMs / 1000);
87
88
  if (nSamples + n > sampleSize) {
88
89
  int start = nSamples + n - sampleSize;
89
90
  float[] audioData = new float[sampleSize];
@@ -100,6 +101,21 @@ public class WhisperContext {
100
101
  return isSpeech;
101
102
  }
102
103
 
104
+ private void finishRealtimeTranscribe(ReadableMap options, WritableMap result) {
105
+ String audioOutputPath = options.hasKey("audioOutputPath") ? options.getString("audioOutputPath") : null;
106
+ if (audioOutputPath != null) {
107
+ // TODO: Append in real time so we don't need to keep all slices & also reduce memory usage
108
+ Log.d(NAME, "Begin saving wav file to " + audioOutputPath);
109
+ try {
110
+ AudioUtils.saveWavFile(AudioUtils.concatShortBuffers(shortBufferSlices), audioOutputPath);
111
+ } catch (IOException e) {
112
+ Log.e(NAME, "Error saving wav file: " + e.getMessage());
113
+ }
114
+ }
115
+
116
+ emitTranscribeEvent("@RNWhisper_onRealtimeTranscribeEnd", Arguments.createMap());
117
+ }
118
+
103
119
  public int startRealtimeTranscribe(int jobId, ReadableMap options) {
104
120
  if (isCapturing || isTranscribing) {
105
121
  return -100;
@@ -131,7 +147,7 @@ public class WhisperContext {
131
147
  shortBufferSlices.add(new short[audioSliceSec * SAMPLE_RATE]);
132
148
  sliceNSamples = new ArrayList<Integer>();
133
149
  sliceNSamples.add(0);
134
-
150
+
135
151
  isCapturing = true;
136
152
  recorder.startRecording();
137
153
 
@@ -159,12 +175,12 @@ public class WhisperContext {
159
175
  nSamples == nSamplesTranscribing &&
160
176
  sliceIndex == transcribeSliceIndex
161
177
  ) {
162
- emitTranscribeEvent("@RNWhisper_onRealtimeTranscribeEnd", Arguments.createMap());
178
+ finishRealtimeTranscribe(options, Arguments.createMap());
163
179
  } else if (!isTranscribing) {
164
180
  short[] shortBuffer = shortBufferSlices.get(sliceIndex);
165
181
  boolean isSpeech = vad(options, shortBuffer, nSamples, 0);
166
182
  if (!isSpeech) {
167
- emitTranscribeEvent("@RNWhisper_onRealtimeTranscribeEnd", Arguments.createMap());
183
+ finishRealtimeTranscribe(options, Arguments.createMap());
168
184
  break;
169
185
  }
170
186
  isTranscribing = true;
@@ -210,11 +226,9 @@ public class WhisperContext {
210
226
  Log.e(NAME, "Error transcribing realtime: " + e.getMessage());
211
227
  }
212
228
  }
213
- // TODO: Append in real time so we don't need to keep all slices & also reduce memory usage
214
- Log.d(NAME, "Begin saving wav file to " + audioOutputPath);
215
- AudioUtils.saveWavFile(AudioUtils.concatShortBuffers(shortBufferSlices), audioOutputPath);
229
+
216
230
  if (!isTranscribing) {
217
- emitTranscribeEvent("@RNWhisper_onRealtimeTranscribeEnd", Arguments.createMap());
231
+ finishRealtimeTranscribe(options, Arguments.createMap());
218
232
  }
219
233
  if (fullHandler != null) {
220
234
  fullHandler.join(); // Wait for full transcribe to finish
@@ -264,7 +278,7 @@ public class WhisperContext {
264
278
 
265
279
  if (code == 0) {
266
280
  payload.putMap("data", getTextSegments(0, getTextSegmentCount(context)));
267
- } else {
281
+ } else if (code != -999) { // Not aborted
268
282
  payload.putString("error", "Transcribe failed with code " + code);
269
283
  }
270
284
 
@@ -283,12 +297,12 @@ public class WhisperContext {
283
297
  nSamplesTranscribing = 0;
284
298
  }
285
299
 
286
- boolean continueNeeded = !isCapturing && nSamplesTranscribing != nSamplesOfIndex;
300
+ boolean continueNeeded = !isCapturing && nSamplesTranscribing != nSamplesOfIndex && code != -999;
287
301
 
288
302
  if (isStopped && !continueNeeded) {
289
303
  payload.putBoolean("isCapturing", false);
290
304
  payload.putBoolean("isStoppedByAction", isStoppedByAction);
291
- emitTranscribeEvent("@RNWhisper_onRealtimeTranscribeEnd", payload);
305
+ finishRealtimeTranscribe(options, payload);
292
306
  } else if (code == 0) {
293
307
  payload.putBoolean("isCapturing", true);
294
308
  emitTranscribeEvent("@RNWhisper_onRealtimeTranscribe", payload);
@@ -372,7 +386,7 @@ public class WhisperContext {
372
386
  int code = full(jobId, options, audioData, audioData.length);
373
387
  isTranscribing = false;
374
388
  this.jobId = -1;
375
- if (code != 0) {
389
+ if (code != 0 && code != 999) {
376
390
  throw new Exception("Failed to transcribe the file. Code: " + code);
377
391
  }
378
392
  WritableMap result = getTextSegments(0, getTextSegmentCount(context));
@@ -401,7 +415,7 @@ public class WhisperContext {
401
415
  options.hasKey("maxLen") ? options.getInt("maxLen") : -1,
402
416
  // jboolean token_timestamps,
403
417
  options.hasKey("tokenTimestamps") ? options.getBoolean("tokenTimestamps") : false,
404
-
418
+
405
419
  // jint offset,
406
420
  options.hasKey("offset") ? options.getInt("offset") : -1,
407
421
  // jint duration,
@@ -577,4 +591,4 @@ public class WhisperContext {
577
591
  protected static native int getTextSegmentT0(long context, int index);
578
592
  protected static native int getTextSegmentT1(long context, int index);
579
593
  protected static native void freeContext(long contextPtr);
580
- }
594
+ }
@@ -297,16 +297,17 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
297
297
  }
298
298
 
299
299
  // abort handlers
300
+ bool* abort_ptr = rn_whisper_assign_abort_map(job_id);
300
301
  params.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) {
301
302
  bool is_aborted = *(bool*)user_data;
302
303
  return !is_aborted;
303
304
  };
304
- params.encoder_begin_callback_user_data = rn_whisper_assign_abort_map(job_id);
305
+ params.encoder_begin_callback_user_data = abort_ptr;
305
306
  params.abort_callback = [](void * user_data) {
306
307
  bool is_aborted = *(bool*)user_data;
307
308
  return is_aborted;
308
309
  };
309
- params.abort_callback_user_data = rn_whisper_assign_abort_map(job_id);
310
+ params.abort_callback_user_data = abort_ptr;
310
311
 
311
312
  if (callback_instance != nullptr) {
312
313
  callback_context *cb_ctx = new callback_context;
@@ -344,6 +345,9 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
344
345
  }
345
346
  env->ReleaseFloatArrayElements(audio_data, audio_data_arr, JNI_ABORT);
346
347
  env->ReleaseStringUTFChars(language, language_chars);
348
+ if (rn_whisper_transcribe_is_aborted(job_id)) {
349
+ code = -999;
350
+ }
347
351
  rn_whisper_remove_abort_map(job_id);
348
352
  return code;
349
353
  }