whisper.rn 0.3.7 → 0.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -0
- package/android/src/main/java/com/rnwhisper/AudioUtils.java +119 -0
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +37 -116
- package/android/src/main/jni.cpp +23 -12
- package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +26 -0
- package/ios/RNWhisper.mm +81 -22
- package/ios/RNWhisper.xcodeproj/project.pbxproj +6 -0
- package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcuserdata/jhen.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
- package/ios/RNWhisperAudioSessionUtils.h +13 -0
- package/ios/RNWhisperAudioSessionUtils.m +91 -0
- package/ios/RNWhisperAudioUtils.h +1 -0
- package/ios/RNWhisperAudioUtils.m +21 -0
- package/ios/RNWhisperContext.h +1 -0
- package/ios/RNWhisperContext.mm +56 -28
- package/jest/mock.js +10 -0
- package/lib/commonjs/AudioSessionIos.js +91 -0
- package/lib/commonjs/AudioSessionIos.js.map +1 -0
- package/lib/commonjs/NativeRNWhisper.js.map +1 -1
- package/lib/commonjs/index.js +82 -14
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/AudioSessionIos.js +83 -0
- package/lib/module/AudioSessionIos.js.map +1 -0
- package/lib/module/NativeRNWhisper.js.map +1 -1
- package/lib/module/index.js +77 -14
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/AudioSessionIos.d.ts +54 -0
- package/lib/typescript/AudioSessionIos.d.ts.map +1 -0
- package/lib/typescript/NativeRNWhisper.d.ts +8 -0
- package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +37 -2
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/AudioSessionIos.ts +90 -0
- package/src/NativeRNWhisper.ts +11 -1
- package/src/index.ts +153 -26
package/README.md
CHANGED
|
@@ -99,6 +99,34 @@ subscribe(evt => {
|
|
|
99
99
|
})
|
|
100
100
|
```
|
|
101
101
|
|
|
102
|
+
In iOS, You may need to change the Audio Session so that it can be used with other audio playback, or to optimize the quality of the recording. So we have provided AudioSession utilities for you:
|
|
103
|
+
|
|
104
|
+
Option 1 - Use options in transcribeRealtime:
|
|
105
|
+
```js
|
|
106
|
+
import { AudioSessionIos } from 'whisper.rn'
|
|
107
|
+
|
|
108
|
+
const { stop, subscribe } = await whisperContext.transcribeRealtime({
|
|
109
|
+
audioSessionOnStartIos: {
|
|
110
|
+
category: AudioSessionIos.Category.PlayAndRecord,
|
|
111
|
+
options: [AudioSessionIos.CategoryOption.MixWithOthers],
|
|
112
|
+
mode: AudioSessionIos.Mode.Default,
|
|
113
|
+
},
|
|
114
|
+
audioSessionOnStopIos: 'restore', // Or an AudioSessionSettingIos
|
|
115
|
+
})
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
Option 2 - Manage the Audio Session in anywhere:
|
|
119
|
+
```js
|
|
120
|
+
import { AudioSessionIos } from 'whisper.rn'
|
|
121
|
+
|
|
122
|
+
await AudioSessionIos.setCategory(
|
|
123
|
+
AudioSessionIos.Category.PlayAndRecord, [AudioSessionIos.CategoryOption.MixWithOthers],
|
|
124
|
+
)
|
|
125
|
+
await AudioSessionIos.setMode(AudioSessionIos.Mode.Default)
|
|
126
|
+
await AudioSessionIos.setActive(true)
|
|
127
|
+
// Then you can start do recording
|
|
128
|
+
```
|
|
129
|
+
|
|
102
130
|
In Android, you may need to request the microphone permission by [`PermissionAndroid`](https://reactnative.dev/docs/permissionsandroid).
|
|
103
131
|
|
|
104
132
|
Please visit the [Documentation](docs/) for more details.
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
package com.rnwhisper;
|
|
2
|
+
|
|
3
|
+
import android.util.Log;
|
|
4
|
+
|
|
5
|
+
import java.util.ArrayList;
|
|
6
|
+
import java.lang.StringBuilder;
|
|
7
|
+
import java.io.IOException;
|
|
8
|
+
import java.io.FileReader;
|
|
9
|
+
import java.io.ByteArrayOutputStream;
|
|
10
|
+
import java.io.File;
|
|
11
|
+
import java.io.FileOutputStream;
|
|
12
|
+
import java.io.DataOutputStream;
|
|
13
|
+
import java.io.IOException;
|
|
14
|
+
import java.io.InputStream;
|
|
15
|
+
import java.nio.ByteBuffer;
|
|
16
|
+
import java.nio.ByteOrder;
|
|
17
|
+
import java.nio.ShortBuffer;
|
|
18
|
+
|
|
19
|
+
public class AudioUtils {
|
|
20
|
+
private static final String NAME = "RNWhisperAudioUtils";
|
|
21
|
+
|
|
22
|
+
private static final int SAMPLE_RATE = 16000;
|
|
23
|
+
|
|
24
|
+
private static byte[] shortToByte(short[] shortInts) {
|
|
25
|
+
int j = 0;
|
|
26
|
+
int length = shortInts.length;
|
|
27
|
+
byte[] byteData = new byte[length * 2];
|
|
28
|
+
for (int i = 0; i < length; i++) {
|
|
29
|
+
byteData[j++] = (byte) (shortInts[i] >>> 8);
|
|
30
|
+
byteData[j++] = (byte) (shortInts[i] >>> 0);
|
|
31
|
+
}
|
|
32
|
+
return byteData;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
public static byte[] concatShortBuffers(ArrayList<short[]> buffers) {
|
|
36
|
+
int totalLength = 0;
|
|
37
|
+
for (int i = 0; i < buffers.size(); i++) {
|
|
38
|
+
totalLength += buffers.get(i).length;
|
|
39
|
+
}
|
|
40
|
+
byte[] result = new byte[totalLength * 2];
|
|
41
|
+
int offset = 0;
|
|
42
|
+
for (int i = 0; i < buffers.size(); i++) {
|
|
43
|
+
byte[] bytes = shortToByte(buffers.get(i));
|
|
44
|
+
System.arraycopy(bytes, 0, result, offset, bytes.length);
|
|
45
|
+
offset += bytes.length;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
return result;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
private static byte[] removeTrailingZeros(byte[] audioData) {
|
|
52
|
+
int i = audioData.length - 1;
|
|
53
|
+
while (i >= 0 && audioData[i] == 0) {
|
|
54
|
+
--i;
|
|
55
|
+
}
|
|
56
|
+
byte[] newData = new byte[i + 1];
|
|
57
|
+
System.arraycopy(audioData, 0, newData, 0, i + 1);
|
|
58
|
+
return newData;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
public static void saveWavFile(byte[] rawData, String audioOutputFile) throws IOException {
|
|
62
|
+
Log.d(NAME, "call saveWavFile");
|
|
63
|
+
rawData = removeTrailingZeros(rawData);
|
|
64
|
+
DataOutputStream output = null;
|
|
65
|
+
try {
|
|
66
|
+
output = new DataOutputStream(new FileOutputStream(audioOutputFile));
|
|
67
|
+
// WAVE header
|
|
68
|
+
// see http://ccrma.stanford.edu/courses/422/projects/WaveFormat/
|
|
69
|
+
output.writeBytes("RIFF"); // chunk id
|
|
70
|
+
output.writeInt(Integer.reverseBytes(36 + rawData.length)); // chunk size
|
|
71
|
+
output.writeBytes("WAVE"); // format
|
|
72
|
+
output.writeBytes("fmt "); // subchunk 1 id
|
|
73
|
+
output.writeInt(Integer.reverseBytes(16)); // subchunk 1 size
|
|
74
|
+
output.writeShort(Short.reverseBytes((short) 1)); // audio format (1 = PCM)
|
|
75
|
+
output.writeShort(Short.reverseBytes((short) 1)); // number of channels
|
|
76
|
+
output.writeInt(Integer.reverseBytes(SAMPLE_RATE)); // sample rate
|
|
77
|
+
output.writeInt(Integer.reverseBytes(SAMPLE_RATE * 2)); // byte rate
|
|
78
|
+
output.writeShort(Short.reverseBytes((short) 2)); // block align
|
|
79
|
+
output.writeShort(Short.reverseBytes((short) 16)); // bits per sample
|
|
80
|
+
output.writeBytes("data"); // subchunk 2 id
|
|
81
|
+
output.writeInt(Integer.reverseBytes(rawData.length)); // subchunk 2 size
|
|
82
|
+
// Audio data (conversion big endian -> little endian)
|
|
83
|
+
short[] shorts = new short[rawData.length / 2];
|
|
84
|
+
ByteBuffer.wrap(rawData).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shorts);
|
|
85
|
+
ByteBuffer bytes = ByteBuffer.allocate(shorts.length * 2);
|
|
86
|
+
for (short s : shorts) {
|
|
87
|
+
bytes.putShort(s);
|
|
88
|
+
}
|
|
89
|
+
Log.d(NAME, "writing audio file: " + audioOutputFile);
|
|
90
|
+
output.write(bytes.array());
|
|
91
|
+
} finally {
|
|
92
|
+
if (output != null) {
|
|
93
|
+
output.close();
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
public static float[] decodeWaveFile(InputStream inputStream) throws IOException {
|
|
99
|
+
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
|
100
|
+
byte[] buffer = new byte[1024];
|
|
101
|
+
int bytesRead;
|
|
102
|
+
while ((bytesRead = inputStream.read(buffer)) != -1) {
|
|
103
|
+
baos.write(buffer, 0, bytesRead);
|
|
104
|
+
}
|
|
105
|
+
ByteBuffer byteBuffer = ByteBuffer.wrap(baos.toByteArray());
|
|
106
|
+
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
|
|
107
|
+
byteBuffer.position(44);
|
|
108
|
+
ShortBuffer shortBuffer = byteBuffer.asShortBuffer();
|
|
109
|
+
short[] shortArray = new short[shortBuffer.limit()];
|
|
110
|
+
shortBuffer.get(shortArray);
|
|
111
|
+
float[] floatArray = new float[shortArray.length];
|
|
112
|
+
for (int i = 0; i < shortArray.length; i++) {
|
|
113
|
+
floatArray[i] = ((float) shortArray[i]) / 32767.0f;
|
|
114
|
+
floatArray[i] = Math.max(floatArray[i], -1f);
|
|
115
|
+
floatArray[i] = Math.min(floatArray[i], 1f);
|
|
116
|
+
}
|
|
117
|
+
return floatArray;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
@@ -14,24 +14,15 @@ import android.media.AudioFormat;
|
|
|
14
14
|
import android.media.AudioRecord;
|
|
15
15
|
import android.media.MediaRecorder.AudioSource;
|
|
16
16
|
|
|
17
|
-
import java.util.Random;
|
|
18
17
|
import java.util.ArrayList;
|
|
19
18
|
import java.lang.StringBuilder;
|
|
20
|
-
import java.io.File;
|
|
21
19
|
import java.io.BufferedReader;
|
|
22
20
|
import java.io.IOException;
|
|
23
21
|
import java.io.FileReader;
|
|
24
|
-
import java.io.ByteArrayOutputStream;
|
|
25
22
|
import java.io.File;
|
|
26
|
-
import java.io.FileInputStream;
|
|
27
|
-
import java.io.FileOutputStream;
|
|
28
|
-
import java.io.DataOutputStream;
|
|
29
23
|
import java.io.IOException;
|
|
30
24
|
import java.io.InputStream;
|
|
31
25
|
import java.io.PushbackInputStream;
|
|
32
|
-
import java.nio.ByteBuffer;
|
|
33
|
-
import java.nio.ByteOrder;
|
|
34
|
-
import java.nio.ShortBuffer;
|
|
35
26
|
|
|
36
27
|
public class WhisperContext {
|
|
37
28
|
public static final String NAME = "RNWhisperContext";
|
|
@@ -88,80 +79,6 @@ public class WhisperContext {
|
|
|
88
79
|
fullHandler = null;
|
|
89
80
|
}
|
|
90
81
|
|
|
91
|
-
public byte[] shortToByte(short[] shortInts) {
|
|
92
|
-
int j = 0;
|
|
93
|
-
int length = shortInts.length;
|
|
94
|
-
byte[] byteData = new byte[length * 2];
|
|
95
|
-
for (int i = 0; i < length; i++) {
|
|
96
|
-
byteData[j++] = (byte) (shortInts[i] >>> 8);
|
|
97
|
-
byteData[j++] = (byte) (shortInts[i] >>> 0);
|
|
98
|
-
}
|
|
99
|
-
return byteData;
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
public byte[] concatShortBuffers(ArrayList<short[]> buffers) {
|
|
103
|
-
int totalLength = 0;
|
|
104
|
-
for (int i = 0; i < buffers.size(); i++) {
|
|
105
|
-
totalLength += buffers.get(i).length;
|
|
106
|
-
}
|
|
107
|
-
byte[] result = new byte[totalLength * 2];
|
|
108
|
-
int offset = 0;
|
|
109
|
-
for (int i = 0; i < buffers.size(); i++) {
|
|
110
|
-
byte[] bytes = shortToByte(buffers.get(i));
|
|
111
|
-
System.arraycopy(bytes, 0, result, offset, bytes.length);
|
|
112
|
-
offset += bytes.length;
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
return result;
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
public byte[] removeTrailingZeros(byte[] audioData) {
|
|
119
|
-
int i = audioData.length - 1;
|
|
120
|
-
while (i >= 0 && audioData[i] == 0) {
|
|
121
|
-
--i;
|
|
122
|
-
}
|
|
123
|
-
byte[] newData = new byte[i + 1];
|
|
124
|
-
System.arraycopy(audioData, 0, newData, 0, i + 1);
|
|
125
|
-
return newData;
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
private void saveWavFile(byte[] rawData, String audioOutputFile) throws IOException {
|
|
129
|
-
Log.d(NAME, "call saveWavFile");
|
|
130
|
-
rawData = removeTrailingZeros(rawData);
|
|
131
|
-
DataOutputStream output = null;
|
|
132
|
-
try {
|
|
133
|
-
output = new DataOutputStream(new FileOutputStream(audioOutputFile));
|
|
134
|
-
// WAVE header
|
|
135
|
-
// see http://ccrma.stanford.edu/courses/422/projects/WaveFormat/
|
|
136
|
-
output.writeBytes("RIFF"); // chunk id
|
|
137
|
-
output.writeInt(Integer.reverseBytes(36 + rawData.length)); // chunk size
|
|
138
|
-
output.writeBytes("WAVE"); // format
|
|
139
|
-
output.writeBytes("fmt "); // subchunk 1 id
|
|
140
|
-
output.writeInt(Integer.reverseBytes(16)); // subchunk 1 size
|
|
141
|
-
output.writeShort(Short.reverseBytes((short) 1)); // audio format (1 = PCM)
|
|
142
|
-
output.writeShort(Short.reverseBytes((short) 1)); // number of channels
|
|
143
|
-
output.writeInt(Integer.reverseBytes(SAMPLE_RATE)); // sample rate
|
|
144
|
-
output.writeInt(Integer.reverseBytes(SAMPLE_RATE * 2)); // byte rate
|
|
145
|
-
output.writeShort(Short.reverseBytes((short) 2)); // block align
|
|
146
|
-
output.writeShort(Short.reverseBytes((short) 16)); // bits per sample
|
|
147
|
-
output.writeBytes("data"); // subchunk 2 id
|
|
148
|
-
output.writeInt(Integer.reverseBytes(rawData.length)); // subchunk 2 size
|
|
149
|
-
// Audio data (conversion big endian -> little endian)
|
|
150
|
-
short[] shorts = new short[rawData.length / 2];
|
|
151
|
-
ByteBuffer.wrap(rawData).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shorts);
|
|
152
|
-
ByteBuffer bytes = ByteBuffer.allocate(shorts.length * 2);
|
|
153
|
-
for (short s : shorts) {
|
|
154
|
-
bytes.putShort(s);
|
|
155
|
-
}
|
|
156
|
-
Log.d(NAME, "writing audio file: " + audioOutputFile);
|
|
157
|
-
output.write(bytes.array());
|
|
158
|
-
} finally {
|
|
159
|
-
if (output != null) {
|
|
160
|
-
output.close();
|
|
161
|
-
}
|
|
162
|
-
}
|
|
163
|
-
}
|
|
164
|
-
|
|
165
82
|
private boolean vad(ReadableMap options, short[] shortBuffer, int nSamples, int n) {
|
|
166
83
|
boolean isSpeech = true;
|
|
167
84
|
if (!isTranscribing && options.hasKey("useVad") && options.getBoolean("useVad")) {
|
|
@@ -295,7 +212,7 @@ public class WhisperContext {
|
|
|
295
212
|
}
|
|
296
213
|
// TODO: Append in real time so we don't need to keep all slices & also reduce memory usage
|
|
297
214
|
Log.d(NAME, "Begin saving wav file to " + audioOutputPath);
|
|
298
|
-
saveWavFile(concatShortBuffers(shortBufferSlices), audioOutputPath);
|
|
215
|
+
AudioUtils.saveWavFile(AudioUtils.concatShortBuffers(shortBufferSlices), audioOutputPath);
|
|
299
216
|
if (!isTranscribing) {
|
|
300
217
|
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribeEnd", Arguments.createMap());
|
|
301
218
|
}
|
|
@@ -346,7 +263,7 @@ public class WhisperContext {
|
|
|
346
263
|
payload.putInt("sliceIndex", transcribeSliceIndex);
|
|
347
264
|
|
|
348
265
|
if (code == 0) {
|
|
349
|
-
payload.putMap("data", getTextSegments());
|
|
266
|
+
payload.putMap("data", getTextSegments(0, getTextSegmentCount(context)));
|
|
350
267
|
} else {
|
|
351
268
|
payload.putString("error", "Transcribe failed with code " + code);
|
|
352
269
|
}
|
|
@@ -406,16 +323,41 @@ public class WhisperContext {
|
|
|
406
323
|
eventEmitter.emit("@RNWhisper_onTranscribeProgress", event);
|
|
407
324
|
}
|
|
408
325
|
|
|
409
|
-
private
|
|
326
|
+
private void emitNewSegments(WritableMap result) {
|
|
327
|
+
WritableMap event = Arguments.createMap();
|
|
328
|
+
event.putInt("contextId", WhisperContext.this.id);
|
|
329
|
+
event.putInt("jobId", jobId);
|
|
330
|
+
event.putMap("result", result);
|
|
331
|
+
eventEmitter.emit("@RNWhisper_onTranscribeNewSegments", event);
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
private static class Callback {
|
|
410
335
|
WhisperContext context;
|
|
336
|
+
boolean emitProgressNeeded = false;
|
|
337
|
+
boolean emitNewSegmentsNeeded = false;
|
|
338
|
+
int totalNNew = 0;
|
|
411
339
|
|
|
412
|
-
public
|
|
340
|
+
public Callback(WhisperContext context, boolean emitProgressNeeded, boolean emitNewSegmentsNeeded) {
|
|
413
341
|
this.context = context;
|
|
342
|
+
this.emitProgressNeeded = emitProgressNeeded;
|
|
343
|
+
this.emitNewSegmentsNeeded = emitNewSegmentsNeeded;
|
|
414
344
|
}
|
|
415
345
|
|
|
416
346
|
void onProgress(int progress) {
|
|
347
|
+
if (!emitProgressNeeded) return;
|
|
417
348
|
context.emitProgress(progress);
|
|
418
349
|
}
|
|
350
|
+
|
|
351
|
+
void onNewSegments(int nNew) {
|
|
352
|
+
Log.d(NAME, "onNewSegments: " + nNew);
|
|
353
|
+
totalNNew += nNew;
|
|
354
|
+
if (!emitNewSegmentsNeeded) return;
|
|
355
|
+
|
|
356
|
+
WritableMap result = context.getTextSegments(totalNNew - nNew, totalNNew);
|
|
357
|
+
result.putInt("nNew", nNew);
|
|
358
|
+
result.putInt("totalNNew", totalNNew);
|
|
359
|
+
context.emitNewSegments(result);
|
|
360
|
+
}
|
|
419
361
|
}
|
|
420
362
|
|
|
421
363
|
public WritableMap transcribeInputStream(int jobId, InputStream inputStream, ReadableMap options) throws IOException, Exception {
|
|
@@ -426,19 +368,21 @@ public class WhisperContext {
|
|
|
426
368
|
|
|
427
369
|
this.jobId = jobId;
|
|
428
370
|
isTranscribing = true;
|
|
429
|
-
float[] audioData = decodeWaveFile(inputStream);
|
|
371
|
+
float[] audioData = AudioUtils.decodeWaveFile(inputStream);
|
|
430
372
|
int code = full(jobId, options, audioData, audioData.length);
|
|
431
373
|
isTranscribing = false;
|
|
432
374
|
this.jobId = -1;
|
|
433
375
|
if (code != 0) {
|
|
434
376
|
throw new Exception("Failed to transcribe the file. Code: " + code);
|
|
435
377
|
}
|
|
436
|
-
WritableMap result = getTextSegments();
|
|
378
|
+
WritableMap result = getTextSegments(0, getTextSegmentCount(context));
|
|
437
379
|
result.putBoolean("isAborted", isStoppedByAction);
|
|
438
380
|
return result;
|
|
439
381
|
}
|
|
440
382
|
|
|
441
383
|
private int full(int jobId, ReadableMap options, float[] audioData, int audioDataLen) {
|
|
384
|
+
boolean hasProgressCallback = options.hasKey("onProgress") && options.getBoolean("onProgress");
|
|
385
|
+
boolean hasNewSegmentsCallback = options.hasKey("onNewSegments") && options.getBoolean("onNewSegments");
|
|
442
386
|
return fullTranscribe(
|
|
443
387
|
jobId,
|
|
444
388
|
context,
|
|
@@ -478,13 +422,12 @@ public class WhisperContext {
|
|
|
478
422
|
options.hasKey("language") ? options.getString("language") : "auto",
|
|
479
423
|
// jstring prompt
|
|
480
424
|
options.hasKey("prompt") ? options.getString("prompt") : null,
|
|
481
|
-
//
|
|
482
|
-
|
|
425
|
+
// Callback callback
|
|
426
|
+
hasProgressCallback || hasNewSegmentsCallback ? new Callback(this, hasProgressCallback, hasNewSegmentsCallback) : null
|
|
483
427
|
);
|
|
484
428
|
}
|
|
485
429
|
|
|
486
|
-
private WritableMap getTextSegments() {
|
|
487
|
-
Integer count = getTextSegmentCount(context);
|
|
430
|
+
private WritableMap getTextSegments(int start, int count) {
|
|
488
431
|
StringBuilder builder = new StringBuilder();
|
|
489
432
|
|
|
490
433
|
WritableMap data = Arguments.createMap();
|
|
@@ -537,28 +480,6 @@ public class WhisperContext {
|
|
|
537
480
|
freeContext(context);
|
|
538
481
|
}
|
|
539
482
|
|
|
540
|
-
public static float[] decodeWaveFile(InputStream inputStream) throws IOException {
|
|
541
|
-
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
|
542
|
-
byte[] buffer = new byte[1024];
|
|
543
|
-
int bytesRead;
|
|
544
|
-
while ((bytesRead = inputStream.read(buffer)) != -1) {
|
|
545
|
-
baos.write(buffer, 0, bytesRead);
|
|
546
|
-
}
|
|
547
|
-
ByteBuffer byteBuffer = ByteBuffer.wrap(baos.toByteArray());
|
|
548
|
-
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
|
|
549
|
-
byteBuffer.position(44);
|
|
550
|
-
ShortBuffer shortBuffer = byteBuffer.asShortBuffer();
|
|
551
|
-
short[] shortArray = new short[shortBuffer.limit()];
|
|
552
|
-
shortBuffer.get(shortArray);
|
|
553
|
-
float[] floatArray = new float[shortArray.length];
|
|
554
|
-
for (int i = 0; i < shortArray.length; i++) {
|
|
555
|
-
floatArray[i] = ((float) shortArray[i]) / 32767.0f;
|
|
556
|
-
floatArray[i] = Math.max(floatArray[i], -1f);
|
|
557
|
-
floatArray[i] = Math.min(floatArray[i], 1f);
|
|
558
|
-
}
|
|
559
|
-
return floatArray;
|
|
560
|
-
}
|
|
561
|
-
|
|
562
483
|
static {
|
|
563
484
|
Log.d(NAME, "Primary ABI: " + Build.SUPPORTED_ABIS[0]);
|
|
564
485
|
boolean loadVfpv4 = false;
|
|
@@ -647,7 +568,7 @@ public class WhisperContext {
|
|
|
647
568
|
boolean translate,
|
|
648
569
|
String language,
|
|
649
570
|
String prompt,
|
|
650
|
-
|
|
571
|
+
Callback Callback
|
|
651
572
|
);
|
|
652
573
|
protected static native void abortTranscribe(int jobId);
|
|
653
574
|
protected static native void abortAllTranscribe();
|
package/android/src/main/jni.cpp
CHANGED
|
@@ -206,9 +206,9 @@ Java_com_rnwhisper_WhisperContext_vadSimple(
|
|
|
206
206
|
return is_speech;
|
|
207
207
|
}
|
|
208
208
|
|
|
209
|
-
struct
|
|
209
|
+
struct callback_context {
|
|
210
210
|
JNIEnv *env;
|
|
211
|
-
jobject
|
|
211
|
+
jobject callback_instance;
|
|
212
212
|
};
|
|
213
213
|
|
|
214
214
|
JNIEXPORT jint JNICALL
|
|
@@ -234,7 +234,7 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
|
|
|
234
234
|
jboolean translate,
|
|
235
235
|
jstring language,
|
|
236
236
|
jstring prompt,
|
|
237
|
-
jobject
|
|
237
|
+
jobject callback_instance
|
|
238
238
|
) {
|
|
239
239
|
UNUSED(thiz);
|
|
240
240
|
struct whisper_context *context = reinterpret_cast<struct whisper_context *>(context_ptr);
|
|
@@ -302,19 +302,30 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
|
|
|
302
302
|
};
|
|
303
303
|
params.encoder_begin_callback_user_data = rn_whisper_assign_abort_map(job_id);
|
|
304
304
|
|
|
305
|
-
if (
|
|
305
|
+
if (callback_instance != nullptr) {
|
|
306
|
+
callback_context *cb_ctx = new callback_context;
|
|
307
|
+
cb_ctx->env = env;
|
|
308
|
+
cb_ctx->callback_instance = env->NewGlobalRef(callback_instance);
|
|
309
|
+
|
|
306
310
|
params.progress_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, int progress, void * user_data) {
|
|
307
|
-
|
|
311
|
+
callback_context *cb_ctx = (callback_context *)user_data;
|
|
308
312
|
JNIEnv *env = cb_ctx->env;
|
|
309
|
-
jobject
|
|
310
|
-
jclass
|
|
311
|
-
jmethodID onProgress = env->GetMethodID(
|
|
312
|
-
env->CallVoidMethod(
|
|
313
|
+
jobject callback_instance = cb_ctx->callback_instance;
|
|
314
|
+
jclass callback_class = env->GetObjectClass(callback_instance);
|
|
315
|
+
jmethodID onProgress = env->GetMethodID(callback_class, "onProgress", "(I)V");
|
|
316
|
+
env->CallVoidMethod(callback_instance, onProgress, progress);
|
|
313
317
|
};
|
|
314
|
-
progress_callback_context *cb_ctx = new progress_callback_context;
|
|
315
|
-
cb_ctx->env = env;
|
|
316
|
-
cb_ctx->progress_callback_instance = env->NewGlobalRef(progress_callback_instance);
|
|
317
318
|
params.progress_callback_user_data = cb_ctx;
|
|
319
|
+
|
|
320
|
+
params.new_segment_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, int n_new, void * user_data) {
|
|
321
|
+
callback_context *cb_ctx = (callback_context *)user_data;
|
|
322
|
+
JNIEnv *env = cb_ctx->env;
|
|
323
|
+
jobject callback_instance = cb_ctx->callback_instance;
|
|
324
|
+
jclass callback_class = env->GetObjectClass(callback_instance);
|
|
325
|
+
jmethodID onNewSegments = env->GetMethodID(callback_class, "onNewSegments", "(I)V");
|
|
326
|
+
env->CallVoidMethod(callback_instance, onNewSegments, n_new);
|
|
327
|
+
};
|
|
328
|
+
params.new_segment_callback_user_data = cb_ctx;
|
|
318
329
|
}
|
|
319
330
|
|
|
320
331
|
LOGI("About to reset timings");
|
|
@@ -6,6 +6,7 @@ import com.facebook.react.bridge.Promise;
|
|
|
6
6
|
import com.facebook.react.bridge.ReactApplicationContext;
|
|
7
7
|
import com.facebook.react.bridge.ReactMethod;
|
|
8
8
|
import com.facebook.react.bridge.ReadableMap;
|
|
9
|
+
import com.facebook.react.bridge.ReadableArray;
|
|
9
10
|
import com.facebook.react.module.annotations.ReactModule;
|
|
10
11
|
|
|
11
12
|
import java.util.HashMap;
|
|
@@ -65,4 +66,29 @@ public class RNWhisperModule extends NativeRNWhisperSpec {
|
|
|
65
66
|
public void releaseAllContexts(Promise promise) {
|
|
66
67
|
rnwhisper.releaseAllContexts(promise);
|
|
67
68
|
}
|
|
69
|
+
|
|
70
|
+
/*
|
|
71
|
+
* iOS Specific methods, left here for make the turbo module happy:
|
|
72
|
+
*/
|
|
73
|
+
|
|
74
|
+
@ReactMethod
|
|
75
|
+
public void getAudioSessionCurrentCategory(Promise promise) {
|
|
76
|
+
promise.resolve(null);
|
|
77
|
+
}
|
|
78
|
+
@ReactMethod
|
|
79
|
+
public void getAudioSessionCurrentMode(Promise promise) {
|
|
80
|
+
promise.resolve(null);
|
|
81
|
+
}
|
|
82
|
+
@ReactMethod
|
|
83
|
+
public void setAudioSessionCategory(String category, ReadableArray options, Promise promise) {
|
|
84
|
+
promise.resolve(null);
|
|
85
|
+
}
|
|
86
|
+
@ReactMethod
|
|
87
|
+
public void setAudioSessionMode(String mode, Promise promise) {
|
|
88
|
+
promise.resolve(null);
|
|
89
|
+
}
|
|
90
|
+
@ReactMethod
|
|
91
|
+
public void setAudioSessionActive(boolean active, Promise promise) {
|
|
92
|
+
promise.resolve(null);
|
|
93
|
+
}
|
|
68
94
|
}
|
package/ios/RNWhisper.mm
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
#import "RNWhisper.h"
|
|
2
2
|
#import "RNWhisperContext.h"
|
|
3
3
|
#import "RNWhisperDownloader.h"
|
|
4
|
+
#import "RNWhisperAudioUtils.h"
|
|
5
|
+
#import "RNWhisperAudioSessionUtils.h"
|
|
4
6
|
#include <stdlib.h>
|
|
5
7
|
#include <string>
|
|
6
8
|
|
|
@@ -87,6 +89,7 @@ RCT_REMAP_METHOD(initContext,
|
|
|
87
89
|
- (NSArray *)supportedEvents {
|
|
88
90
|
return@[
|
|
89
91
|
@"@RNWhisper_onTranscribeProgress",
|
|
92
|
+
@"@RNWhisper_onTranscribeNewSegments",
|
|
90
93
|
@"@RNWhisper_onRealtimeTranscribe",
|
|
91
94
|
@"@RNWhisper_onRealtimeTranscribeEnd",
|
|
92
95
|
];
|
|
@@ -121,7 +124,7 @@ RCT_REMAP_METHOD(transcribeFile,
|
|
|
121
124
|
}
|
|
122
125
|
|
|
123
126
|
int count = 0;
|
|
124
|
-
float *waveFile = [
|
|
127
|
+
float *waveFile = [RNWhisperAudioUtils decodeWaveFile:path count:&count];
|
|
125
128
|
if (waveFile == nil) {
|
|
126
129
|
reject(@"whisper_error", @"Invalid file", nil);
|
|
127
130
|
return;
|
|
@@ -144,6 +147,20 @@ RCT_REMAP_METHOD(transcribeFile,
|
|
|
144
147
|
];
|
|
145
148
|
});
|
|
146
149
|
}
|
|
150
|
+
onNewSegments: ^(NSDictionary *result) {
|
|
151
|
+
if (rn_whisper_transcribe_is_aborted(jobId)) {
|
|
152
|
+
return;
|
|
153
|
+
}
|
|
154
|
+
dispatch_async(dispatch_get_main_queue(), ^{
|
|
155
|
+
[self sendEventWithName:@"@RNWhisper_onTranscribeNewSegments"
|
|
156
|
+
body:@{
|
|
157
|
+
@"contextId": [NSNumber numberWithInt:contextId],
|
|
158
|
+
@"jobId": [NSNumber numberWithInt:jobId],
|
|
159
|
+
@"result": result
|
|
160
|
+
}
|
|
161
|
+
];
|
|
162
|
+
});
|
|
163
|
+
}
|
|
147
164
|
onEnd: ^(int code) {
|
|
148
165
|
if (code != 0) {
|
|
149
166
|
free(waveFile);
|
|
@@ -242,27 +259,6 @@ RCT_REMAP_METHOD(releaseAllContexts,
|
|
|
242
259
|
resolve(nil);
|
|
243
260
|
}
|
|
244
261
|
|
|
245
|
-
- (float *)decodeWaveFile:(NSString*)filePath count:(int *)count {
|
|
246
|
-
NSURL *url = [NSURL fileURLWithPath:filePath];
|
|
247
|
-
NSData *fileData = [NSData dataWithContentsOfURL:url];
|
|
248
|
-
if (fileData == nil) {
|
|
249
|
-
return nil;
|
|
250
|
-
}
|
|
251
|
-
NSMutableData *waveData = [[NSMutableData alloc] init];
|
|
252
|
-
[waveData appendData:[fileData subdataWithRange:NSMakeRange(44, [fileData length]-44)]];
|
|
253
|
-
const short *shortArray = (const short *)[waveData bytes];
|
|
254
|
-
int shortCount = (int) ([waveData length] / sizeof(short));
|
|
255
|
-
float *floatArray = (float *) malloc(shortCount * sizeof(float));
|
|
256
|
-
for (NSInteger i = 0; i < shortCount; i++) {
|
|
257
|
-
float floatValue = ((float)shortArray[i]) / 32767.0;
|
|
258
|
-
floatValue = MAX(floatValue, -1.0);
|
|
259
|
-
floatValue = MIN(floatValue, 1.0);
|
|
260
|
-
floatArray[i] = floatValue;
|
|
261
|
-
}
|
|
262
|
-
*count = shortCount;
|
|
263
|
-
return floatArray;
|
|
264
|
-
}
|
|
265
|
-
|
|
266
262
|
- (void)invalidate {
|
|
267
263
|
[super invalidate];
|
|
268
264
|
|
|
@@ -283,6 +279,69 @@ RCT_REMAP_METHOD(releaseAllContexts,
|
|
|
283
279
|
[RNWhisperDownloader clearCache];
|
|
284
280
|
}
|
|
285
281
|
|
|
282
|
+
// MARK: - AudioSessionUtils
|
|
283
|
+
|
|
284
|
+
RCT_EXPORT_METHOD(getAudioSessionCurrentCategory:(RCTPromiseResolveBlock)resolve
|
|
285
|
+
withRejecter:(RCTPromiseRejectBlock)reject)
|
|
286
|
+
{
|
|
287
|
+
NSString *category = [RNWhisperAudioSessionUtils getCurrentCategory];
|
|
288
|
+
NSArray *options = [RNWhisperAudioSessionUtils getCurrentOptions];
|
|
289
|
+
resolve(@{
|
|
290
|
+
@"category": category,
|
|
291
|
+
@"options": options
|
|
292
|
+
});
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
RCT_EXPORT_METHOD(getAudioSessionCurrentMode:(RCTPromiseResolveBlock)resolve
|
|
296
|
+
withRejecter:(RCTPromiseRejectBlock)reject)
|
|
297
|
+
{
|
|
298
|
+
NSString *mode = [RNWhisperAudioSessionUtils getCurrentMode];
|
|
299
|
+
resolve(mode);
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
RCT_REMAP_METHOD(setAudioSessionCategory,
|
|
303
|
+
withCategory:(NSString *)category
|
|
304
|
+
withOptions:(NSArray *)options
|
|
305
|
+
withResolver:(RCTPromiseResolveBlock)resolve
|
|
306
|
+
withRejecter:(RCTPromiseRejectBlock)reject)
|
|
307
|
+
{
|
|
308
|
+
NSError *error = nil;
|
|
309
|
+
[RNWhisperAudioSessionUtils setCategory:category options:options error:&error];
|
|
310
|
+
if (error != nil) {
|
|
311
|
+
reject(@"whisper_error", [NSString stringWithFormat:@"Failed to set category. Error: %@", error], nil);
|
|
312
|
+
return;
|
|
313
|
+
}
|
|
314
|
+
resolve(nil);
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
RCT_REMAP_METHOD(setAudioSessionMode,
|
|
318
|
+
withMode:(NSString *)mode
|
|
319
|
+
withResolver:(RCTPromiseResolveBlock)resolve
|
|
320
|
+
withRejecter:(RCTPromiseRejectBlock)reject)
|
|
321
|
+
{
|
|
322
|
+
NSError *error = nil;
|
|
323
|
+
[RNWhisperAudioSessionUtils setMode:mode error:&error];
|
|
324
|
+
if (error != nil) {
|
|
325
|
+
reject(@"whisper_error", [NSString stringWithFormat:@"Failed to set mode. Error: %@", error], nil);
|
|
326
|
+
return;
|
|
327
|
+
}
|
|
328
|
+
resolve(nil);
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
RCT_REMAP_METHOD(setAudioSessionActive,
|
|
332
|
+
withActive:(BOOL)active
|
|
333
|
+
withResolver:(RCTPromiseResolveBlock)resolve
|
|
334
|
+
withRejecter:(RCTPromiseRejectBlock)reject)
|
|
335
|
+
{
|
|
336
|
+
NSError *error = nil;
|
|
337
|
+
[RNWhisperAudioSessionUtils setActive:active error:&error];
|
|
338
|
+
if (error != nil) {
|
|
339
|
+
reject(@"whisper_error", [NSString stringWithFormat:@"Failed to set active. Error: %@", error], nil);
|
|
340
|
+
return;
|
|
341
|
+
}
|
|
342
|
+
resolve(nil);
|
|
343
|
+
}
|
|
344
|
+
|
|
286
345
|
#ifdef RCT_NEW_ARCH_ENABLED
|
|
287
346
|
- (std::shared_ptr<facebook::react::TurboModule>)getTurboModule:
|
|
288
347
|
(const facebook::react::ObjCTurboModule::InitParams &)params
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
|
|
9
9
|
/* Begin PBXBuildFile section */
|
|
10
10
|
5E555C0D2413F4C50049A1A2 /* RNWhisper.mm in Sources */ = {isa = PBXBuildFile; fileRef = B3E7B5891CC2AC0600A0062D /* RNWhisper.mm */; };
|
|
11
|
+
7F458E922AC7DC74007045F6 /* RNWhisperAudioSessionUtils.m in Sources */ = {isa = PBXBuildFile; fileRef = 7F458E912AC7DC74007045F6 /* RNWhisperAudioSessionUtils.m */; };
|
|
11
12
|
7FE0BBA12ABE6C7B0049B4E4 /* RNWhisperDownloader.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE0BB9B2ABE6C7B0049B4E4 /* RNWhisperDownloader.m */; };
|
|
12
13
|
7FE0BBA22ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE0BB9C2ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m */; };
|
|
13
14
|
7FE0BBA32ABE6C7B0049B4E4 /* RNWhisperContext.mm in Sources */ = {isa = PBXBuildFile; fileRef = 7FE0BBA02ABE6C7B0049B4E4 /* RNWhisperContext.mm */; };
|
|
@@ -27,6 +28,8 @@
|
|
|
27
28
|
|
|
28
29
|
/* Begin PBXFileReference section */
|
|
29
30
|
134814201AA4EA6300B7C361 /* libRNWhisper.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libRNWhisper.a; sourceTree = BUILT_PRODUCTS_DIR; };
|
|
31
|
+
7F458E902AC7DC74007045F6 /* RNWhisperAudioSessionUtils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RNWhisperAudioSessionUtils.h; sourceTree = "<group>"; };
|
|
32
|
+
7F458E912AC7DC74007045F6 /* RNWhisperAudioSessionUtils.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RNWhisperAudioSessionUtils.m; sourceTree = "<group>"; };
|
|
30
33
|
7FE0BB9A2ABE6C7B0049B4E4 /* RNWhisper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RNWhisper.h; sourceTree = "<group>"; };
|
|
31
34
|
7FE0BB9B2ABE6C7B0049B4E4 /* RNWhisperDownloader.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RNWhisperDownloader.m; sourceTree = "<group>"; };
|
|
32
35
|
7FE0BB9C2ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RNWhisperAudioUtils.m; sourceTree = "<group>"; };
|
|
@@ -59,6 +62,8 @@
|
|
|
59
62
|
58B511D21A9E6C8500147676 = {
|
|
60
63
|
isa = PBXGroup;
|
|
61
64
|
children = (
|
|
65
|
+
7F458E902AC7DC74007045F6 /* RNWhisperAudioSessionUtils.h */,
|
|
66
|
+
7F458E912AC7DC74007045F6 /* RNWhisperAudioSessionUtils.m */,
|
|
62
67
|
7FE0BB9F2ABE6C7B0049B4E4 /* RNWhisperAudioUtils.h */,
|
|
63
68
|
7FE0BB9C2ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m */,
|
|
64
69
|
7FE0BB9A2ABE6C7B0049B4E4 /* RNWhisper.h */,
|
|
@@ -132,6 +137,7 @@
|
|
|
132
137
|
7FE0BBA22ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m in Sources */,
|
|
133
138
|
7FE0BBA32ABE6C7B0049B4E4 /* RNWhisperContext.mm in Sources */,
|
|
134
139
|
7FE0BBA12ABE6C7B0049B4E4 /* RNWhisperDownloader.m in Sources */,
|
|
140
|
+
7F458E922AC7DC74007045F6 /* RNWhisperAudioSessionUtils.m in Sources */,
|
|
135
141
|
);
|
|
136
142
|
runOnlyForDeploymentPostprocessing = 0;
|
|
137
143
|
};
|