whisper.rn 0.3.7 → 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/README.md +28 -0
  2. package/android/src/main/java/com/rnwhisper/AudioUtils.java +119 -0
  3. package/android/src/main/java/com/rnwhisper/WhisperContext.java +37 -116
  4. package/android/src/main/jni.cpp +23 -12
  5. package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +26 -0
  6. package/ios/RNWhisper.mm +81 -22
  7. package/ios/RNWhisper.xcodeproj/project.pbxproj +6 -0
  8. package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcuserdata/jhen.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
  9. package/ios/RNWhisperAudioSessionUtils.h +13 -0
  10. package/ios/RNWhisperAudioSessionUtils.m +85 -0
  11. package/ios/RNWhisperAudioUtils.h +1 -0
  12. package/ios/RNWhisperAudioUtils.m +21 -0
  13. package/ios/RNWhisperContext.h +1 -0
  14. package/ios/RNWhisperContext.mm +56 -28
  15. package/lib/commonjs/AudioSessionIos.js +91 -0
  16. package/lib/commonjs/AudioSessionIos.js.map +1 -0
  17. package/lib/commonjs/NativeRNWhisper.js.map +1 -1
  18. package/lib/commonjs/index.js +82 -14
  19. package/lib/commonjs/index.js.map +1 -1
  20. package/lib/module/AudioSessionIos.js +83 -0
  21. package/lib/module/AudioSessionIos.js.map +1 -0
  22. package/lib/module/NativeRNWhisper.js.map +1 -1
  23. package/lib/module/index.js +77 -14
  24. package/lib/module/index.js.map +1 -1
  25. package/lib/typescript/AudioSessionIos.d.ts +54 -0
  26. package/lib/typescript/AudioSessionIos.d.ts.map +1 -0
  27. package/lib/typescript/NativeRNWhisper.d.ts +8 -0
  28. package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
  29. package/lib/typescript/index.d.ts +37 -2
  30. package/lib/typescript/index.d.ts.map +1 -1
  31. package/package.json +1 -1
  32. package/src/AudioSessionIos.ts +90 -0
  33. package/src/NativeRNWhisper.ts +11 -1
  34. package/src/index.ts +153 -26
package/README.md CHANGED
@@ -99,6 +99,34 @@ subscribe(evt => {
99
99
  })
100
100
  ```
101
101
 
102
+ In iOS, You may need to change the Audio Session so that it can be used with other audio playback, or to optimize the quality of the recording. So we have provided AudioSession utilities for you:
103
+
104
+ Option 1 - Use options in transcribeRealtime:
105
+ ```js
106
+ import { AudioSessionIos } from 'whisper.rn'
107
+
108
+ const { stop, subscribe } = await whisperContext.transcribeRealtime({
109
+ audioSessionOnStartIos: {
110
+ category: AudioSessionIos.Category.PlayAndRecord,
111
+ options: [AudioSessionIos.CategoryOption.MixWithOthers],
112
+ mode: AudioSessionIos.Mode.Default,
113
+ },
114
+ audioSessionOnStopIos: 'restore', // Or an AudioSessionSettingIos
115
+ })
116
+ ```
117
+
118
+ Option 2 - Manage the Audio Session in anywhere:
119
+ ```js
120
+ import { AudioSessionIos } from 'whisper.rn'
121
+
122
+ await AudioSessionIos.setCategory(
123
+ AudioSessionIos.Category.PlayAndRecord, [AudioSessionIos.CategoryOption.MixWithOthers],
124
+ )
125
+ await AudioSessionIos.setMode(AudioSessionIos.Mode.Default)
126
+ await AudioSessionIos.setActive(true)
127
+ // Then you can start do recording
128
+ ```
129
+
102
130
  In Android, you may need to request the microphone permission by [`PermissionAndroid`](https://reactnative.dev/docs/permissionsandroid).
103
131
 
104
132
  Please visit the [Documentation](docs/) for more details.
@@ -0,0 +1,119 @@
1
+ package com.rnwhisper;
2
+
3
+ import android.util.Log;
4
+
5
+ import java.util.ArrayList;
6
+ import java.lang.StringBuilder;
7
+ import java.io.IOException;
8
+ import java.io.FileReader;
9
+ import java.io.ByteArrayOutputStream;
10
+ import java.io.File;
11
+ import java.io.FileOutputStream;
12
+ import java.io.DataOutputStream;
13
+ import java.io.IOException;
14
+ import java.io.InputStream;
15
+ import java.nio.ByteBuffer;
16
+ import java.nio.ByteOrder;
17
+ import java.nio.ShortBuffer;
18
+
19
+ public class AudioUtils {
20
+ private static final String NAME = "RNWhisperAudioUtils";
21
+
22
+ private static final int SAMPLE_RATE = 16000;
23
+
24
+ private static byte[] shortToByte(short[] shortInts) {
25
+ int j = 0;
26
+ int length = shortInts.length;
27
+ byte[] byteData = new byte[length * 2];
28
+ for (int i = 0; i < length; i++) {
29
+ byteData[j++] = (byte) (shortInts[i] >>> 8);
30
+ byteData[j++] = (byte) (shortInts[i] >>> 0);
31
+ }
32
+ return byteData;
33
+ }
34
+
35
+ public static byte[] concatShortBuffers(ArrayList<short[]> buffers) {
36
+ int totalLength = 0;
37
+ for (int i = 0; i < buffers.size(); i++) {
38
+ totalLength += buffers.get(i).length;
39
+ }
40
+ byte[] result = new byte[totalLength * 2];
41
+ int offset = 0;
42
+ for (int i = 0; i < buffers.size(); i++) {
43
+ byte[] bytes = shortToByte(buffers.get(i));
44
+ System.arraycopy(bytes, 0, result, offset, bytes.length);
45
+ offset += bytes.length;
46
+ }
47
+
48
+ return result;
49
+ }
50
+
51
+ private static byte[] removeTrailingZeros(byte[] audioData) {
52
+ int i = audioData.length - 1;
53
+ while (i >= 0 && audioData[i] == 0) {
54
+ --i;
55
+ }
56
+ byte[] newData = new byte[i + 1];
57
+ System.arraycopy(audioData, 0, newData, 0, i + 1);
58
+ return newData;
59
+ }
60
+
61
+ public static void saveWavFile(byte[] rawData, String audioOutputFile) throws IOException {
62
+ Log.d(NAME, "call saveWavFile");
63
+ rawData = removeTrailingZeros(rawData);
64
+ DataOutputStream output = null;
65
+ try {
66
+ output = new DataOutputStream(new FileOutputStream(audioOutputFile));
67
+ // WAVE header
68
+ // see http://ccrma.stanford.edu/courses/422/projects/WaveFormat/
69
+ output.writeBytes("RIFF"); // chunk id
70
+ output.writeInt(Integer.reverseBytes(36 + rawData.length)); // chunk size
71
+ output.writeBytes("WAVE"); // format
72
+ output.writeBytes("fmt "); // subchunk 1 id
73
+ output.writeInt(Integer.reverseBytes(16)); // subchunk 1 size
74
+ output.writeShort(Short.reverseBytes((short) 1)); // audio format (1 = PCM)
75
+ output.writeShort(Short.reverseBytes((short) 1)); // number of channels
76
+ output.writeInt(Integer.reverseBytes(SAMPLE_RATE)); // sample rate
77
+ output.writeInt(Integer.reverseBytes(SAMPLE_RATE * 2)); // byte rate
78
+ output.writeShort(Short.reverseBytes((short) 2)); // block align
79
+ output.writeShort(Short.reverseBytes((short) 16)); // bits per sample
80
+ output.writeBytes("data"); // subchunk 2 id
81
+ output.writeInt(Integer.reverseBytes(rawData.length)); // subchunk 2 size
82
+ // Audio data (conversion big endian -> little endian)
83
+ short[] shorts = new short[rawData.length / 2];
84
+ ByteBuffer.wrap(rawData).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shorts);
85
+ ByteBuffer bytes = ByteBuffer.allocate(shorts.length * 2);
86
+ for (short s : shorts) {
87
+ bytes.putShort(s);
88
+ }
89
+ Log.d(NAME, "writing audio file: " + audioOutputFile);
90
+ output.write(bytes.array());
91
+ } finally {
92
+ if (output != null) {
93
+ output.close();
94
+ }
95
+ }
96
+ }
97
+
98
+ public static float[] decodeWaveFile(InputStream inputStream) throws IOException {
99
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
100
+ byte[] buffer = new byte[1024];
101
+ int bytesRead;
102
+ while ((bytesRead = inputStream.read(buffer)) != -1) {
103
+ baos.write(buffer, 0, bytesRead);
104
+ }
105
+ ByteBuffer byteBuffer = ByteBuffer.wrap(baos.toByteArray());
106
+ byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
107
+ byteBuffer.position(44);
108
+ ShortBuffer shortBuffer = byteBuffer.asShortBuffer();
109
+ short[] shortArray = new short[shortBuffer.limit()];
110
+ shortBuffer.get(shortArray);
111
+ float[] floatArray = new float[shortArray.length];
112
+ for (int i = 0; i < shortArray.length; i++) {
113
+ floatArray[i] = ((float) shortArray[i]) / 32767.0f;
114
+ floatArray[i] = Math.max(floatArray[i], -1f);
115
+ floatArray[i] = Math.min(floatArray[i], 1f);
116
+ }
117
+ return floatArray;
118
+ }
119
+ }
@@ -14,24 +14,15 @@ import android.media.AudioFormat;
14
14
  import android.media.AudioRecord;
15
15
  import android.media.MediaRecorder.AudioSource;
16
16
 
17
- import java.util.Random;
18
17
  import java.util.ArrayList;
19
18
  import java.lang.StringBuilder;
20
- import java.io.File;
21
19
  import java.io.BufferedReader;
22
20
  import java.io.IOException;
23
21
  import java.io.FileReader;
24
- import java.io.ByteArrayOutputStream;
25
22
  import java.io.File;
26
- import java.io.FileInputStream;
27
- import java.io.FileOutputStream;
28
- import java.io.DataOutputStream;
29
23
  import java.io.IOException;
30
24
  import java.io.InputStream;
31
25
  import java.io.PushbackInputStream;
32
- import java.nio.ByteBuffer;
33
- import java.nio.ByteOrder;
34
- import java.nio.ShortBuffer;
35
26
 
36
27
  public class WhisperContext {
37
28
  public static final String NAME = "RNWhisperContext";
@@ -88,80 +79,6 @@ public class WhisperContext {
88
79
  fullHandler = null;
89
80
  }
90
81
 
91
- public byte[] shortToByte(short[] shortInts) {
92
- int j = 0;
93
- int length = shortInts.length;
94
- byte[] byteData = new byte[length * 2];
95
- for (int i = 0; i < length; i++) {
96
- byteData[j++] = (byte) (shortInts[i] >>> 8);
97
- byteData[j++] = (byte) (shortInts[i] >>> 0);
98
- }
99
- return byteData;
100
- }
101
-
102
- public byte[] concatShortBuffers(ArrayList<short[]> buffers) {
103
- int totalLength = 0;
104
- for (int i = 0; i < buffers.size(); i++) {
105
- totalLength += buffers.get(i).length;
106
- }
107
- byte[] result = new byte[totalLength * 2];
108
- int offset = 0;
109
- for (int i = 0; i < buffers.size(); i++) {
110
- byte[] bytes = shortToByte(buffers.get(i));
111
- System.arraycopy(bytes, 0, result, offset, bytes.length);
112
- offset += bytes.length;
113
- }
114
-
115
- return result;
116
- }
117
-
118
- public byte[] removeTrailingZeros(byte[] audioData) {
119
- int i = audioData.length - 1;
120
- while (i >= 0 && audioData[i] == 0) {
121
- --i;
122
- }
123
- byte[] newData = new byte[i + 1];
124
- System.arraycopy(audioData, 0, newData, 0, i + 1);
125
- return newData;
126
- }
127
-
128
- private void saveWavFile(byte[] rawData, String audioOutputFile) throws IOException {
129
- Log.d(NAME, "call saveWavFile");
130
- rawData = removeTrailingZeros(rawData);
131
- DataOutputStream output = null;
132
- try {
133
- output = new DataOutputStream(new FileOutputStream(audioOutputFile));
134
- // WAVE header
135
- // see http://ccrma.stanford.edu/courses/422/projects/WaveFormat/
136
- output.writeBytes("RIFF"); // chunk id
137
- output.writeInt(Integer.reverseBytes(36 + rawData.length)); // chunk size
138
- output.writeBytes("WAVE"); // format
139
- output.writeBytes("fmt "); // subchunk 1 id
140
- output.writeInt(Integer.reverseBytes(16)); // subchunk 1 size
141
- output.writeShort(Short.reverseBytes((short) 1)); // audio format (1 = PCM)
142
- output.writeShort(Short.reverseBytes((short) 1)); // number of channels
143
- output.writeInt(Integer.reverseBytes(SAMPLE_RATE)); // sample rate
144
- output.writeInt(Integer.reverseBytes(SAMPLE_RATE * 2)); // byte rate
145
- output.writeShort(Short.reverseBytes((short) 2)); // block align
146
- output.writeShort(Short.reverseBytes((short) 16)); // bits per sample
147
- output.writeBytes("data"); // subchunk 2 id
148
- output.writeInt(Integer.reverseBytes(rawData.length)); // subchunk 2 size
149
- // Audio data (conversion big endian -> little endian)
150
- short[] shorts = new short[rawData.length / 2];
151
- ByteBuffer.wrap(rawData).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shorts);
152
- ByteBuffer bytes = ByteBuffer.allocate(shorts.length * 2);
153
- for (short s : shorts) {
154
- bytes.putShort(s);
155
- }
156
- Log.d(NAME, "writing audio file: " + audioOutputFile);
157
- output.write(bytes.array());
158
- } finally {
159
- if (output != null) {
160
- output.close();
161
- }
162
- }
163
- }
164
-
165
82
  private boolean vad(ReadableMap options, short[] shortBuffer, int nSamples, int n) {
166
83
  boolean isSpeech = true;
167
84
  if (!isTranscribing && options.hasKey("useVad") && options.getBoolean("useVad")) {
@@ -295,7 +212,7 @@ public class WhisperContext {
295
212
  }
296
213
  // TODO: Append in real time so we don't need to keep all slices & also reduce memory usage
297
214
  Log.d(NAME, "Begin saving wav file to " + audioOutputPath);
298
- saveWavFile(concatShortBuffers(shortBufferSlices), audioOutputPath);
215
+ AudioUtils.saveWavFile(AudioUtils.concatShortBuffers(shortBufferSlices), audioOutputPath);
299
216
  if (!isTranscribing) {
300
217
  emitTranscribeEvent("@RNWhisper_onRealtimeTranscribeEnd", Arguments.createMap());
301
218
  }
@@ -346,7 +263,7 @@ public class WhisperContext {
346
263
  payload.putInt("sliceIndex", transcribeSliceIndex);
347
264
 
348
265
  if (code == 0) {
349
- payload.putMap("data", getTextSegments());
266
+ payload.putMap("data", getTextSegments(0, getTextSegmentCount(context)));
350
267
  } else {
351
268
  payload.putString("error", "Transcribe failed with code " + code);
352
269
  }
@@ -406,16 +323,41 @@ public class WhisperContext {
406
323
  eventEmitter.emit("@RNWhisper_onTranscribeProgress", event);
407
324
  }
408
325
 
409
- private static class ProgressCallback {
326
+ private void emitNewSegments(WritableMap result) {
327
+ WritableMap event = Arguments.createMap();
328
+ event.putInt("contextId", WhisperContext.this.id);
329
+ event.putInt("jobId", jobId);
330
+ event.putMap("result", result);
331
+ eventEmitter.emit("@RNWhisper_onTranscribeNewSegments", event);
332
+ }
333
+
334
+ private static class Callback {
410
335
  WhisperContext context;
336
+ boolean emitProgressNeeded = false;
337
+ boolean emitNewSegmentsNeeded = false;
338
+ int totalNNew = 0;
411
339
 
412
- public ProgressCallback(WhisperContext context) {
340
+ public Callback(WhisperContext context, boolean emitProgressNeeded, boolean emitNewSegmentsNeeded) {
413
341
  this.context = context;
342
+ this.emitProgressNeeded = emitProgressNeeded;
343
+ this.emitNewSegmentsNeeded = emitNewSegmentsNeeded;
414
344
  }
415
345
 
416
346
  void onProgress(int progress) {
347
+ if (!emitProgressNeeded) return;
417
348
  context.emitProgress(progress);
418
349
  }
350
+
351
+ void onNewSegments(int nNew) {
352
+ Log.d(NAME, "onNewSegments: " + nNew);
353
+ totalNNew += nNew;
354
+ if (!emitNewSegmentsNeeded) return;
355
+
356
+ WritableMap result = context.getTextSegments(totalNNew - nNew, totalNNew);
357
+ result.putInt("nNew", nNew);
358
+ result.putInt("totalNNew", totalNNew);
359
+ context.emitNewSegments(result);
360
+ }
419
361
  }
420
362
 
421
363
  public WritableMap transcribeInputStream(int jobId, InputStream inputStream, ReadableMap options) throws IOException, Exception {
@@ -426,19 +368,21 @@ public class WhisperContext {
426
368
 
427
369
  this.jobId = jobId;
428
370
  isTranscribing = true;
429
- float[] audioData = decodeWaveFile(inputStream);
371
+ float[] audioData = AudioUtils.decodeWaveFile(inputStream);
430
372
  int code = full(jobId, options, audioData, audioData.length);
431
373
  isTranscribing = false;
432
374
  this.jobId = -1;
433
375
  if (code != 0) {
434
376
  throw new Exception("Failed to transcribe the file. Code: " + code);
435
377
  }
436
- WritableMap result = getTextSegments();
378
+ WritableMap result = getTextSegments(0, getTextSegmentCount(context));
437
379
  result.putBoolean("isAborted", isStoppedByAction);
438
380
  return result;
439
381
  }
440
382
 
441
383
  private int full(int jobId, ReadableMap options, float[] audioData, int audioDataLen) {
384
+ boolean hasProgressCallback = options.hasKey("onProgress") && options.getBoolean("onProgress");
385
+ boolean hasNewSegmentsCallback = options.hasKey("onNewSegments") && options.getBoolean("onNewSegments");
442
386
  return fullTranscribe(
443
387
  jobId,
444
388
  context,
@@ -478,13 +422,12 @@ public class WhisperContext {
478
422
  options.hasKey("language") ? options.getString("language") : "auto",
479
423
  // jstring prompt
480
424
  options.hasKey("prompt") ? options.getString("prompt") : null,
481
- // ProgressCallback progressCallback
482
- options.hasKey("onProgress") && options.getBoolean("onProgress") ? new ProgressCallback(this) : null
425
+ // Callback callback
426
+ hasProgressCallback || hasNewSegmentsCallback ? new Callback(this, hasProgressCallback, hasNewSegmentsCallback) : null
483
427
  );
484
428
  }
485
429
 
486
- private WritableMap getTextSegments() {
487
- Integer count = getTextSegmentCount(context);
430
+ private WritableMap getTextSegments(int start, int count) {
488
431
  StringBuilder builder = new StringBuilder();
489
432
 
490
433
  WritableMap data = Arguments.createMap();
@@ -537,28 +480,6 @@ public class WhisperContext {
537
480
  freeContext(context);
538
481
  }
539
482
 
540
- public static float[] decodeWaveFile(InputStream inputStream) throws IOException {
541
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
542
- byte[] buffer = new byte[1024];
543
- int bytesRead;
544
- while ((bytesRead = inputStream.read(buffer)) != -1) {
545
- baos.write(buffer, 0, bytesRead);
546
- }
547
- ByteBuffer byteBuffer = ByteBuffer.wrap(baos.toByteArray());
548
- byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
549
- byteBuffer.position(44);
550
- ShortBuffer shortBuffer = byteBuffer.asShortBuffer();
551
- short[] shortArray = new short[shortBuffer.limit()];
552
- shortBuffer.get(shortArray);
553
- float[] floatArray = new float[shortArray.length];
554
- for (int i = 0; i < shortArray.length; i++) {
555
- floatArray[i] = ((float) shortArray[i]) / 32767.0f;
556
- floatArray[i] = Math.max(floatArray[i], -1f);
557
- floatArray[i] = Math.min(floatArray[i], 1f);
558
- }
559
- return floatArray;
560
- }
561
-
562
483
  static {
563
484
  Log.d(NAME, "Primary ABI: " + Build.SUPPORTED_ABIS[0]);
564
485
  boolean loadVfpv4 = false;
@@ -647,7 +568,7 @@ public class WhisperContext {
647
568
  boolean translate,
648
569
  String language,
649
570
  String prompt,
650
- ProgressCallback progressCallback
571
+ Callback Callback
651
572
  );
652
573
  protected static native void abortTranscribe(int jobId);
653
574
  protected static native void abortAllTranscribe();
@@ -206,9 +206,9 @@ Java_com_rnwhisper_WhisperContext_vadSimple(
206
206
  return is_speech;
207
207
  }
208
208
 
209
- struct progress_callback_context {
209
+ struct callback_context {
210
210
  JNIEnv *env;
211
- jobject progress_callback_instance;
211
+ jobject callback_instance;
212
212
  };
213
213
 
214
214
  JNIEXPORT jint JNICALL
@@ -234,7 +234,7 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
234
234
  jboolean translate,
235
235
  jstring language,
236
236
  jstring prompt,
237
- jobject progress_callback_instance
237
+ jobject callback_instance
238
238
  ) {
239
239
  UNUSED(thiz);
240
240
  struct whisper_context *context = reinterpret_cast<struct whisper_context *>(context_ptr);
@@ -302,19 +302,30 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
302
302
  };
303
303
  params.encoder_begin_callback_user_data = rn_whisper_assign_abort_map(job_id);
304
304
 
305
- if (progress_callback_instance != nullptr) {
305
+ if (callback_instance != nullptr) {
306
+ callback_context *cb_ctx = new callback_context;
307
+ cb_ctx->env = env;
308
+ cb_ctx->callback_instance = env->NewGlobalRef(callback_instance);
309
+
306
310
  params.progress_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, int progress, void * user_data) {
307
- progress_callback_context *cb_ctx = (progress_callback_context *)user_data;
311
+ callback_context *cb_ctx = (callback_context *)user_data;
308
312
  JNIEnv *env = cb_ctx->env;
309
- jobject progress_callback_instance = cb_ctx->progress_callback_instance;
310
- jclass progress_callback_class = env->GetObjectClass(progress_callback_instance);
311
- jmethodID onProgress = env->GetMethodID(progress_callback_class, "onProgress", "(I)V");
312
- env->CallVoidMethod(progress_callback_instance, onProgress, progress);
313
+ jobject callback_instance = cb_ctx->callback_instance;
314
+ jclass callback_class = env->GetObjectClass(callback_instance);
315
+ jmethodID onProgress = env->GetMethodID(callback_class, "onProgress", "(I)V");
316
+ env->CallVoidMethod(callback_instance, onProgress, progress);
313
317
  };
314
- progress_callback_context *cb_ctx = new progress_callback_context;
315
- cb_ctx->env = env;
316
- cb_ctx->progress_callback_instance = env->NewGlobalRef(progress_callback_instance);
317
318
  params.progress_callback_user_data = cb_ctx;
319
+
320
+ params.new_segment_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, int n_new, void * user_data) {
321
+ callback_context *cb_ctx = (callback_context *)user_data;
322
+ JNIEnv *env = cb_ctx->env;
323
+ jobject callback_instance = cb_ctx->callback_instance;
324
+ jclass callback_class = env->GetObjectClass(callback_instance);
325
+ jmethodID onNewSegments = env->GetMethodID(callback_class, "onNewSegments", "(I)V");
326
+ env->CallVoidMethod(callback_instance, onNewSegments, n_new);
327
+ };
328
+ params.new_segment_callback_user_data = cb_ctx;
318
329
  }
319
330
 
320
331
  LOGI("About to reset timings");
@@ -6,6 +6,7 @@ import com.facebook.react.bridge.Promise;
6
6
  import com.facebook.react.bridge.ReactApplicationContext;
7
7
  import com.facebook.react.bridge.ReactMethod;
8
8
  import com.facebook.react.bridge.ReadableMap;
9
+ import com.facebook.react.bridge.ReadableArray;
9
10
  import com.facebook.react.module.annotations.ReactModule;
10
11
 
11
12
  import java.util.HashMap;
@@ -65,4 +66,29 @@ public class RNWhisperModule extends NativeRNWhisperSpec {
65
66
  public void releaseAllContexts(Promise promise) {
66
67
  rnwhisper.releaseAllContexts(promise);
67
68
  }
69
+
70
+ /*
71
+ * iOS Specific methods, left here for make the turbo module happy:
72
+ */
73
+
74
+ @ReactMethod
75
+ public void getAudioSessionCurrentCategory(Promise promise) {
76
+ promise.resolve(null);
77
+ }
78
+ @ReactMethod
79
+ public void getAudioSessionCurrentMode(Promise promise) {
80
+ promise.resolve(null);
81
+ }
82
+ @ReactMethod
83
+ public void setAudioSessionCategory(String category, ReadableArray options, Promise promise) {
84
+ promise.resolve(null);
85
+ }
86
+ @ReactMethod
87
+ public void setAudioSessionMode(String mode, Promise promise) {
88
+ promise.resolve(null);
89
+ }
90
+ @ReactMethod
91
+ public void setAudioSessionActive(boolean active, Promise promise) {
92
+ promise.resolve(null);
93
+ }
68
94
  }
package/ios/RNWhisper.mm CHANGED
@@ -1,6 +1,8 @@
1
1
  #import "RNWhisper.h"
2
2
  #import "RNWhisperContext.h"
3
3
  #import "RNWhisperDownloader.h"
4
+ #import "RNWhisperAudioUtils.h"
5
+ #import "RNWhisperAudioSessionUtils.h"
4
6
  #include <stdlib.h>
5
7
  #include <string>
6
8
 
@@ -87,6 +89,7 @@ RCT_REMAP_METHOD(initContext,
87
89
  - (NSArray *)supportedEvents {
88
90
  return@[
89
91
  @"@RNWhisper_onTranscribeProgress",
92
+ @"@RNWhisper_onTranscribeNewSegments",
90
93
  @"@RNWhisper_onRealtimeTranscribe",
91
94
  @"@RNWhisper_onRealtimeTranscribeEnd",
92
95
  ];
@@ -121,7 +124,7 @@ RCT_REMAP_METHOD(transcribeFile,
121
124
  }
122
125
 
123
126
  int count = 0;
124
- float *waveFile = [self decodeWaveFile:path count:&count];
127
+ float *waveFile = [RNWhisperAudioUtils decodeWaveFile:path count:&count];
125
128
  if (waveFile == nil) {
126
129
  reject(@"whisper_error", @"Invalid file", nil);
127
130
  return;
@@ -144,6 +147,20 @@ RCT_REMAP_METHOD(transcribeFile,
144
147
  ];
145
148
  });
146
149
  }
150
+ onNewSegments: ^(NSDictionary *result) {
151
+ if (rn_whisper_transcribe_is_aborted(jobId)) {
152
+ return;
153
+ }
154
+ dispatch_async(dispatch_get_main_queue(), ^{
155
+ [self sendEventWithName:@"@RNWhisper_onTranscribeNewSegments"
156
+ body:@{
157
+ @"contextId": [NSNumber numberWithInt:contextId],
158
+ @"jobId": [NSNumber numberWithInt:jobId],
159
+ @"result": result
160
+ }
161
+ ];
162
+ });
163
+ }
147
164
  onEnd: ^(int code) {
148
165
  if (code != 0) {
149
166
  free(waveFile);
@@ -242,27 +259,6 @@ RCT_REMAP_METHOD(releaseAllContexts,
242
259
  resolve(nil);
243
260
  }
244
261
 
245
- - (float *)decodeWaveFile:(NSString*)filePath count:(int *)count {
246
- NSURL *url = [NSURL fileURLWithPath:filePath];
247
- NSData *fileData = [NSData dataWithContentsOfURL:url];
248
- if (fileData == nil) {
249
- return nil;
250
- }
251
- NSMutableData *waveData = [[NSMutableData alloc] init];
252
- [waveData appendData:[fileData subdataWithRange:NSMakeRange(44, [fileData length]-44)]];
253
- const short *shortArray = (const short *)[waveData bytes];
254
- int shortCount = (int) ([waveData length] / sizeof(short));
255
- float *floatArray = (float *) malloc(shortCount * sizeof(float));
256
- for (NSInteger i = 0; i < shortCount; i++) {
257
- float floatValue = ((float)shortArray[i]) / 32767.0;
258
- floatValue = MAX(floatValue, -1.0);
259
- floatValue = MIN(floatValue, 1.0);
260
- floatArray[i] = floatValue;
261
- }
262
- *count = shortCount;
263
- return floatArray;
264
- }
265
-
266
262
  - (void)invalidate {
267
263
  [super invalidate];
268
264
 
@@ -283,6 +279,69 @@ RCT_REMAP_METHOD(releaseAllContexts,
283
279
  [RNWhisperDownloader clearCache];
284
280
  }
285
281
 
282
+ // MARK: - AudioSessionUtils
283
+
284
+ RCT_EXPORT_METHOD(getAudioSessionCurrentCategory:(RCTPromiseResolveBlock)resolve
285
+ withRejecter:(RCTPromiseRejectBlock)reject)
286
+ {
287
+ NSString *category = [RNWhisperAudioSessionUtils getCurrentCategory];
288
+ NSArray *options = [RNWhisperAudioSessionUtils getCurrentOptions];
289
+ resolve(@{
290
+ @"category": category,
291
+ @"options": options
292
+ });
293
+ }
294
+
295
+ RCT_EXPORT_METHOD(getAudioSessionCurrentMode:(RCTPromiseResolveBlock)resolve
296
+ withRejecter:(RCTPromiseRejectBlock)reject)
297
+ {
298
+ NSString *mode = [RNWhisperAudioSessionUtils getCurrentMode];
299
+ resolve(mode);
300
+ }
301
+
302
+ RCT_REMAP_METHOD(setAudioSessionCategory,
303
+ withCategory:(NSString *)category
304
+ withOptions:(NSArray *)options
305
+ withResolver:(RCTPromiseResolveBlock)resolve
306
+ withRejecter:(RCTPromiseRejectBlock)reject)
307
+ {
308
+ NSError *error = nil;
309
+ [RNWhisperAudioSessionUtils setCategory:category options:options error:&error];
310
+ if (error != nil) {
311
+ reject(@"whisper_error", [NSString stringWithFormat:@"Failed to set category. Error: %@", error], nil);
312
+ return;
313
+ }
314
+ resolve(nil);
315
+ }
316
+
317
+ RCT_REMAP_METHOD(setAudioSessionMode,
318
+ withMode:(NSString *)mode
319
+ withResolver:(RCTPromiseResolveBlock)resolve
320
+ withRejecter:(RCTPromiseRejectBlock)reject)
321
+ {
322
+ NSError *error = nil;
323
+ [RNWhisperAudioSessionUtils setMode:mode error:&error];
324
+ if (error != nil) {
325
+ reject(@"whisper_error", [NSString stringWithFormat:@"Failed to set mode. Error: %@", error], nil);
326
+ return;
327
+ }
328
+ resolve(nil);
329
+ }
330
+
331
+ RCT_REMAP_METHOD(setAudioSessionActive,
332
+ withActive:(BOOL)active
333
+ withResolver:(RCTPromiseResolveBlock)resolve
334
+ withRejecter:(RCTPromiseRejectBlock)reject)
335
+ {
336
+ NSError *error = nil;
337
+ [RNWhisperAudioSessionUtils setActive:active error:&error];
338
+ if (error != nil) {
339
+ reject(@"whisper_error", [NSString stringWithFormat:@"Failed to set active. Error: %@", error], nil);
340
+ return;
341
+ }
342
+ resolve(nil);
343
+ }
344
+
286
345
  #ifdef RCT_NEW_ARCH_ENABLED
287
346
  - (std::shared_ptr<facebook::react::TurboModule>)getTurboModule:
288
347
  (const facebook::react::ObjCTurboModule::InitParams &)params
@@ -8,6 +8,7 @@
8
8
 
9
9
  /* Begin PBXBuildFile section */
10
10
  5E555C0D2413F4C50049A1A2 /* RNWhisper.mm in Sources */ = {isa = PBXBuildFile; fileRef = B3E7B5891CC2AC0600A0062D /* RNWhisper.mm */; };
11
+ 7F458E922AC7DC74007045F6 /* RNWhisperAudioSessionUtils.m in Sources */ = {isa = PBXBuildFile; fileRef = 7F458E912AC7DC74007045F6 /* RNWhisperAudioSessionUtils.m */; };
11
12
  7FE0BBA12ABE6C7B0049B4E4 /* RNWhisperDownloader.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE0BB9B2ABE6C7B0049B4E4 /* RNWhisperDownloader.m */; };
12
13
  7FE0BBA22ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE0BB9C2ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m */; };
13
14
  7FE0BBA32ABE6C7B0049B4E4 /* RNWhisperContext.mm in Sources */ = {isa = PBXBuildFile; fileRef = 7FE0BBA02ABE6C7B0049B4E4 /* RNWhisperContext.mm */; };
@@ -27,6 +28,8 @@
27
28
 
28
29
  /* Begin PBXFileReference section */
29
30
  134814201AA4EA6300B7C361 /* libRNWhisper.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libRNWhisper.a; sourceTree = BUILT_PRODUCTS_DIR; };
31
+ 7F458E902AC7DC74007045F6 /* RNWhisperAudioSessionUtils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RNWhisperAudioSessionUtils.h; sourceTree = "<group>"; };
32
+ 7F458E912AC7DC74007045F6 /* RNWhisperAudioSessionUtils.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RNWhisperAudioSessionUtils.m; sourceTree = "<group>"; };
30
33
  7FE0BB9A2ABE6C7B0049B4E4 /* RNWhisper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RNWhisper.h; sourceTree = "<group>"; };
31
34
  7FE0BB9B2ABE6C7B0049B4E4 /* RNWhisperDownloader.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RNWhisperDownloader.m; sourceTree = "<group>"; };
32
35
  7FE0BB9C2ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RNWhisperAudioUtils.m; sourceTree = "<group>"; };
@@ -59,6 +62,8 @@
59
62
  58B511D21A9E6C8500147676 = {
60
63
  isa = PBXGroup;
61
64
  children = (
65
+ 7F458E902AC7DC74007045F6 /* RNWhisperAudioSessionUtils.h */,
66
+ 7F458E912AC7DC74007045F6 /* RNWhisperAudioSessionUtils.m */,
62
67
  7FE0BB9F2ABE6C7B0049B4E4 /* RNWhisperAudioUtils.h */,
63
68
  7FE0BB9C2ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m */,
64
69
  7FE0BB9A2ABE6C7B0049B4E4 /* RNWhisper.h */,
@@ -132,6 +137,7 @@
132
137
  7FE0BBA22ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m in Sources */,
133
138
  7FE0BBA32ABE6C7B0049B4E4 /* RNWhisperContext.mm in Sources */,
134
139
  7FE0BBA12ABE6C7B0049B4E4 /* RNWhisperDownloader.m in Sources */,
140
+ 7F458E922AC7DC74007045F6 /* RNWhisperAudioSessionUtils.m in Sources */,
135
141
  );
136
142
  runOnlyForDeploymentPostprocessing = 0;
137
143
  };