whisper.rn 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +143 -71
- package/android/src/main/jni/whisper/jni.cpp +1 -2
- package/ios/RNWhisperContext.h +6 -3
- package/ios/RNWhisperContext.mm +100 -18
- package/lib/commonjs/index.js +53 -3
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/index.js +53 -3
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/index.d.ts +26 -10
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/index.tsx +85 -14
|
@@ -15,6 +15,7 @@ import android.media.AudioRecord;
|
|
|
15
15
|
import android.media.MediaRecorder.AudioSource;
|
|
16
16
|
|
|
17
17
|
import java.util.Random;
|
|
18
|
+
import java.util.ArrayList;
|
|
18
19
|
import java.lang.StringBuilder;
|
|
19
20
|
import java.io.File;
|
|
20
21
|
import java.io.BufferedReader;
|
|
@@ -41,19 +42,24 @@ public class WhisperContext {
|
|
|
41
42
|
private int id;
|
|
42
43
|
private ReactApplicationContext reactContext;
|
|
43
44
|
private long context;
|
|
44
|
-
|
|
45
|
-
private DeviceEventManagerModule.RCTDeviceEventEmitter eventEmitter;
|
|
46
|
-
|
|
47
45
|
private int jobId = -1;
|
|
46
|
+
private DeviceEventManagerModule.RCTDeviceEventEmitter eventEmitter;
|
|
47
|
+
|
|
48
48
|
private AudioRecord recorder = null;
|
|
49
49
|
private int bufferSize;
|
|
50
|
-
private short[] buffer16;
|
|
51
|
-
private int nSamples = 0;
|
|
52
50
|
private int nSamplesTranscribing = 0;
|
|
51
|
+
private ArrayList<short[]> shortBufferSlices;
|
|
52
|
+
// Remember number of samples in each slice
|
|
53
|
+
private ArrayList<Integer> sliceNSamples;
|
|
54
|
+
// Current buffer slice index
|
|
55
|
+
private int sliceIndex = 0;
|
|
56
|
+
// Current transcribing slice index
|
|
57
|
+
private int transcribeSliceIndex = 0;
|
|
58
|
+
private boolean isUseSlices = false;
|
|
59
|
+
private boolean isRealtime = false;
|
|
53
60
|
private boolean isCapturing = false;
|
|
54
61
|
private boolean isStoppedByAction = false;
|
|
55
62
|
private boolean isTranscribing = false;
|
|
56
|
-
private boolean isRealtime = false;
|
|
57
63
|
private Thread fullHandler = null;
|
|
58
64
|
|
|
59
65
|
public WhisperContext(int id, ReactApplicationContext reactContext, long context) {
|
|
@@ -64,6 +70,19 @@ public class WhisperContext {
|
|
|
64
70
|
bufferSize = AudioRecord.getMinBufferSize(SAMPLE_RATE, CHANNEL_CONFIG, AUDIO_FORMAT);
|
|
65
71
|
}
|
|
66
72
|
|
|
73
|
+
private void resetRealtimeTranscribe() {
|
|
74
|
+
shortBufferSlices = null;
|
|
75
|
+
sliceNSamples = null;
|
|
76
|
+
sliceIndex = 0;
|
|
77
|
+
transcribeSliceIndex = 0;
|
|
78
|
+
isUseSlices = false;
|
|
79
|
+
isRealtime = false;
|
|
80
|
+
isCapturing = false;
|
|
81
|
+
isStoppedByAction = false;
|
|
82
|
+
isTranscribing = false;
|
|
83
|
+
fullHandler = null;
|
|
84
|
+
}
|
|
85
|
+
|
|
67
86
|
public int startRealtimeTranscribe(int jobId, ReadableMap options) {
|
|
68
87
|
if (isCapturing || isTranscribing) {
|
|
69
88
|
return -100;
|
|
@@ -76,20 +95,25 @@ public class WhisperContext {
|
|
|
76
95
|
recorder.release();
|
|
77
96
|
return state;
|
|
78
97
|
}
|
|
79
|
-
|
|
80
|
-
int realtimeAudioSec = options.hasKey("realtimeAudioSec") ? options.getInt("realtimeAudioSec") : 0;
|
|
81
|
-
final int maxAudioSec = realtimeAudioSec > 0 ? realtimeAudioSec : DEFAULT_MAX_AUDIO_SEC;
|
|
82
98
|
|
|
83
|
-
|
|
99
|
+
resetRealtimeTranscribe();
|
|
84
100
|
|
|
85
101
|
this.jobId = jobId;
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
102
|
+
|
|
103
|
+
int realtimeAudioSec = options.hasKey("realtimeAudioSec") ? options.getInt("realtimeAudioSec") : 0;
|
|
104
|
+
final int audioSec = realtimeAudioSec > 0 ? realtimeAudioSec : DEFAULT_MAX_AUDIO_SEC;
|
|
105
|
+
|
|
106
|
+
int realtimeAudioSliceSec = options.hasKey("realtimeAudioSliceSec") ? options.getInt("realtimeAudioSliceSec") : 0;
|
|
107
|
+
final int audioSliceSec = realtimeAudioSliceSec > 0 && realtimeAudioSliceSec < audioSec ? realtimeAudioSliceSec : audioSec;
|
|
108
|
+
|
|
109
|
+
isUseSlices = audioSliceSec < audioSec;
|
|
110
|
+
|
|
111
|
+
shortBufferSlices = new ArrayList<short[]>();
|
|
112
|
+
shortBufferSlices.add(new short[audioSliceSec * SAMPLE_RATE]);
|
|
113
|
+
sliceNSamples = new ArrayList<Integer>();
|
|
114
|
+
sliceNSamples.add(0);
|
|
92
115
|
|
|
116
|
+
isCapturing = true;
|
|
93
117
|
recorder.startRecording();
|
|
94
118
|
|
|
95
119
|
new Thread(new Runnable() {
|
|
@@ -102,26 +126,56 @@ public class WhisperContext {
|
|
|
102
126
|
int n = recorder.read(buffer, 0, bufferSize);
|
|
103
127
|
if (n == 0) continue;
|
|
104
128
|
|
|
105
|
-
|
|
129
|
+
int totalNSamples = 0;
|
|
130
|
+
for (int i = 0; i < sliceNSamples.size(); i++) {
|
|
131
|
+
totalNSamples += sliceNSamples.get(i);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
int nSamples = sliceNSamples.get(sliceIndex);
|
|
135
|
+
if (totalNSamples + n > audioSec * SAMPLE_RATE) {
|
|
106
136
|
// Full, stop capturing
|
|
107
137
|
isCapturing = false;
|
|
108
|
-
if (
|
|
138
|
+
if (
|
|
139
|
+
!isTranscribing &&
|
|
140
|
+
nSamples == nSamplesTranscribing &&
|
|
141
|
+
sliceIndex == transcribeSliceIndex
|
|
142
|
+
) {
|
|
109
143
|
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribeEnd", Arguments.createMap());
|
|
110
|
-
} else {
|
|
111
|
-
|
|
112
|
-
fullHandler.join();
|
|
144
|
+
} else if (!isTranscribing) {
|
|
145
|
+
isTranscribing = true;
|
|
113
146
|
fullTranscribeSamples(options, true);
|
|
114
147
|
}
|
|
115
148
|
break;
|
|
116
149
|
}
|
|
117
150
|
|
|
118
151
|
// Append to buffer
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
152
|
+
short[] shortBuffer = shortBufferSlices.get(sliceIndex);
|
|
153
|
+
if (nSamples + n > audioSliceSec * SAMPLE_RATE) {
|
|
154
|
+
Log.d(NAME, "next slice");
|
|
155
|
+
|
|
156
|
+
sliceIndex++;
|
|
157
|
+
nSamples = 0;
|
|
158
|
+
shortBuffer = new short[audioSliceSec * SAMPLE_RATE];
|
|
159
|
+
shortBufferSlices.add(shortBuffer);
|
|
160
|
+
sliceNSamples.add(0);
|
|
122
161
|
}
|
|
123
162
|
|
|
124
|
-
|
|
163
|
+
for (int i = 0; i < n; i++) {
|
|
164
|
+
shortBuffer[nSamples + i] = buffer[i];
|
|
165
|
+
}
|
|
166
|
+
nSamples += n;
|
|
167
|
+
sliceNSamples.set(sliceIndex, nSamples);
|
|
168
|
+
|
|
169
|
+
if (!isTranscribing && nSamples > SAMPLE_RATE / 2) {
|
|
170
|
+
isTranscribing = true;
|
|
171
|
+
fullHandler = new Thread(new Runnable() {
|
|
172
|
+
@Override
|
|
173
|
+
public void run() {
|
|
174
|
+
fullTranscribeSamples(options, false);
|
|
175
|
+
}
|
|
176
|
+
});
|
|
177
|
+
fullHandler.start();
|
|
178
|
+
}
|
|
125
179
|
} catch (Exception e) {
|
|
126
180
|
Log.e(NAME, "Error transcribing realtime: " + e.getMessage());
|
|
127
181
|
}
|
|
@@ -132,6 +186,8 @@ public class WhisperContext {
|
|
|
132
186
|
if (fullHandler != null) {
|
|
133
187
|
fullHandler.join(); // Wait for full transcribe to finish
|
|
134
188
|
}
|
|
189
|
+
// Cleanup
|
|
190
|
+
resetRealtimeTranscribe();
|
|
135
191
|
recorder.stop();
|
|
136
192
|
} catch (Exception e) {
|
|
137
193
|
e.printStackTrace();
|
|
@@ -145,55 +201,73 @@ public class WhisperContext {
|
|
|
145
201
|
}
|
|
146
202
|
|
|
147
203
|
private void fullTranscribeSamples(ReadableMap options, boolean skipCapturingCheck) {
|
|
148
|
-
|
|
149
|
-
isTranscribing = true;
|
|
150
|
-
fullHandler = new Thread(new Runnable() {
|
|
151
|
-
@Override
|
|
152
|
-
public void run() {
|
|
153
|
-
if (!isCapturing && !skipCapturingCheck) return;
|
|
154
|
-
|
|
155
|
-
nSamplesTranscribing = nSamples;
|
|
156
|
-
|
|
157
|
-
// convert I16 to F32
|
|
158
|
-
float[] nSamplesBuffer32 = new float[nSamplesTranscribing];
|
|
159
|
-
for (int i = 0; i < nSamplesTranscribing; i++) {
|
|
160
|
-
nSamplesBuffer32[i] = buffer16[i] / 32768.0f;
|
|
161
|
-
}
|
|
204
|
+
int nSamplesOfIndex = sliceNSamples.get(transcribeSliceIndex);
|
|
162
205
|
|
|
163
|
-
|
|
206
|
+
if (!isCapturing && !skipCapturingCheck) return;
|
|
164
207
|
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
int timeEnd = (int) System.currentTimeMillis();
|
|
168
|
-
int timeRecording = (int) (nSamplesTranscribing / SAMPLE_RATE * 1000);
|
|
208
|
+
short[] shortBuffer = shortBufferSlices.get(transcribeSliceIndex);
|
|
209
|
+
int nSamples = sliceNSamples.get(transcribeSliceIndex);
|
|
169
210
|
|
|
170
|
-
|
|
171
|
-
payload.putInt("code", code);
|
|
172
|
-
payload.putInt("processTime", timeEnd - timeStart);
|
|
173
|
-
payload.putInt("recordingTime", timeRecording);
|
|
211
|
+
nSamplesTranscribing = nSamplesOfIndex;
|
|
174
212
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
213
|
+
// convert I16 to F32
|
|
214
|
+
float[] nSamplesBuffer32 = new float[nSamplesTranscribing];
|
|
215
|
+
for (int i = 0; i < nSamplesTranscribing; i++) {
|
|
216
|
+
nSamplesBuffer32[i] = shortBuffer[i] / 32768.0f;
|
|
217
|
+
}
|
|
180
218
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
219
|
+
Log.d(NAME, "Start transcribing realtime: " + nSamplesTranscribing);
|
|
220
|
+
|
|
221
|
+
int timeStart = (int) System.currentTimeMillis();
|
|
222
|
+
int code = full(jobId, options, nSamplesBuffer32, nSamplesTranscribing);
|
|
223
|
+
int timeEnd = (int) System.currentTimeMillis();
|
|
224
|
+
int timeRecording = (int) (nSamplesTranscribing / SAMPLE_RATE * 1000);
|
|
225
|
+
|
|
226
|
+
WritableMap payload = Arguments.createMap();
|
|
227
|
+
payload.putInt("code", code);
|
|
228
|
+
payload.putInt("processTime", timeEnd - timeStart);
|
|
229
|
+
payload.putInt("recordingTime", timeRecording);
|
|
230
|
+
payload.putBoolean("isUseSlices", isUseSlices);
|
|
231
|
+
payload.putInt("sliceIndex", transcribeSliceIndex);
|
|
232
|
+
|
|
233
|
+
if (code == 0) {
|
|
234
|
+
payload.putMap("data", getTextSegments());
|
|
235
|
+
} else {
|
|
236
|
+
payload.putString("error", "Transcribe failed with code " + code);
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
nSamplesOfIndex = sliceNSamples.get(transcribeSliceIndex);
|
|
240
|
+
if (
|
|
241
|
+
isStoppedByAction ||
|
|
242
|
+
!isCapturing &&
|
|
243
|
+
nSamplesTranscribing == nSamplesOfIndex &&
|
|
244
|
+
sliceIndex == transcribeSliceIndex
|
|
245
|
+
) {
|
|
246
|
+
payload.putBoolean("isCapturing", false);
|
|
247
|
+
payload.putBoolean("isStoppedByAction", isStoppedByAction);
|
|
248
|
+
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribeEnd", payload);
|
|
249
|
+
} else if (code == 0) {
|
|
250
|
+
payload.putBoolean("isCapturing", true);
|
|
251
|
+
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribe", payload);
|
|
252
|
+
} else {
|
|
253
|
+
payload.putBoolean("isCapturing", true);
|
|
254
|
+
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribe", payload);
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
if (
|
|
258
|
+
// If no more samples on current slice, move to next slice
|
|
259
|
+
nSamplesTranscribing == sliceNSamples.get(transcribeSliceIndex) &&
|
|
260
|
+
transcribeSliceIndex != sliceIndex
|
|
261
|
+
) {
|
|
262
|
+
transcribeSliceIndex++;
|
|
263
|
+
nSamplesTranscribing = 0;
|
|
196
264
|
}
|
|
265
|
+
|
|
266
|
+
if (!isCapturing && nSamplesTranscribing != nSamplesOfIndex) {
|
|
267
|
+
// If no more capturing, continue transcribing until all slices are transcribed
|
|
268
|
+
fullTranscribeSamples(options, true);
|
|
269
|
+
}
|
|
270
|
+
isTranscribing = false;
|
|
197
271
|
}
|
|
198
272
|
|
|
199
273
|
private void emitTranscribeEvent(final String eventName, final WritableMap payload) {
|
|
@@ -221,8 +295,6 @@ public class WhisperContext {
|
|
|
221
295
|
return fullTranscribe(
|
|
222
296
|
jobId,
|
|
223
297
|
context,
|
|
224
|
-
// jboolean realtime,
|
|
225
|
-
isRealtime,
|
|
226
298
|
// float[] audio_data,
|
|
227
299
|
audioData,
|
|
228
300
|
// jint audio_data_len,
|
|
@@ -273,6 +345,7 @@ public class WhisperContext {
|
|
|
273
345
|
builder.append(text);
|
|
274
346
|
|
|
275
347
|
WritableMap segment = Arguments.createMap();
|
|
348
|
+
Log.d(NAME, "getTextSegments: " + text + " " + transcribeSliceIndex);
|
|
276
349
|
segment.putString("text", text);
|
|
277
350
|
segment.putInt("t0", getTextSegmentT0(context, i));
|
|
278
351
|
segment.putInt("t1", getTextSegmentT1(context, i));
|
|
@@ -399,7 +472,6 @@ public class WhisperContext {
|
|
|
399
472
|
protected static native int fullTranscribe(
|
|
400
473
|
int job_id,
|
|
401
474
|
long context,
|
|
402
|
-
boolean realtime,
|
|
403
475
|
float[] audio_data,
|
|
404
476
|
int audio_data_len,
|
|
405
477
|
int n_threads,
|
|
@@ -39,7 +39,6 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
|
|
|
39
39
|
jobject thiz,
|
|
40
40
|
jint job_id,
|
|
41
41
|
jlong context_ptr,
|
|
42
|
-
jboolean realtime,
|
|
43
42
|
jfloatArray audio_data,
|
|
44
43
|
jint audio_data_len,
|
|
45
44
|
jint n_threads,
|
|
@@ -84,7 +83,7 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
|
|
|
84
83
|
params.speed_up = speed_up;
|
|
85
84
|
params.offset_ms = 0;
|
|
86
85
|
params.no_context = true;
|
|
87
|
-
params.single_segment =
|
|
86
|
+
params.single_segment = false;
|
|
88
87
|
|
|
89
88
|
if (max_len > -1) {
|
|
90
89
|
params.max_len = max_len;
|
package/ios/RNWhisperContext.h
CHANGED
|
@@ -20,10 +20,13 @@ typedef struct {
|
|
|
20
20
|
bool isCapturing;
|
|
21
21
|
bool isStoppedByAction;
|
|
22
22
|
int maxAudioSec;
|
|
23
|
-
int nSamples;
|
|
24
23
|
int nSamplesTranscribing;
|
|
25
|
-
|
|
26
|
-
|
|
24
|
+
NSMutableArray<NSValue *> *shortBufferSlices;
|
|
25
|
+
NSMutableArray<NSNumber *> *sliceNSamples;
|
|
26
|
+
bool isUseSlices;
|
|
27
|
+
int sliceIndex;
|
|
28
|
+
int transcribeSliceIndex;
|
|
29
|
+
int audioSliceSec;
|
|
27
30
|
|
|
28
31
|
AudioQueueRef queue;
|
|
29
32
|
AudioStreamBasicDescription dataFormat;
|
package/ios/RNWhisperContext.mm
CHANGED
|
@@ -27,21 +27,47 @@
|
|
|
27
27
|
self->recordState.dataFormat.mReserved = 0;
|
|
28
28
|
self->recordState.dataFormat.mFormatFlags = kLinearPCMFormatFlagIsSignedInteger;
|
|
29
29
|
|
|
30
|
-
self->recordState.nSamples = 0;
|
|
31
|
-
|
|
32
30
|
int maxAudioSecOpt = options[@"realtimeAudioSec"] != nil ? [options[@"realtimeAudioSec"] intValue] : 0;
|
|
33
31
|
int maxAudioSec = maxAudioSecOpt > 0 ? maxAudioSecOpt : DEFAULT_MAX_AUDIO_SEC;
|
|
34
32
|
self->recordState.maxAudioSec = maxAudioSec;
|
|
35
|
-
|
|
36
|
-
|
|
33
|
+
|
|
34
|
+
int realtimeAudioSliceSec = options[@"realtimeAudioSliceSec"] != nil ? [options[@"realtimeAudioSliceSec"] intValue] : 0;
|
|
35
|
+
int audioSliceSec = realtimeAudioSliceSec > 0 && realtimeAudioSliceSec < maxAudioSec ? realtimeAudioSliceSec : maxAudioSec;
|
|
36
|
+
|
|
37
|
+
self->recordState.audioSliceSec = audioSliceSec;
|
|
38
|
+
self->recordState.isUseSlices = audioSliceSec < maxAudioSec;
|
|
39
|
+
|
|
40
|
+
self->recordState.sliceIndex = 0;
|
|
41
|
+
self->recordState.transcribeSliceIndex = 0;
|
|
42
|
+
self->recordState.nSamplesTranscribing = 0;
|
|
43
|
+
|
|
44
|
+
[self freeBufferIfNeeded];
|
|
45
|
+
self->recordState.shortBufferSlices = [NSMutableArray new];
|
|
46
|
+
|
|
47
|
+
int16_t *audioBufferI16 = (int16_t *) malloc(audioSliceSec * WHISPER_SAMPLE_RATE * sizeof(int16_t));
|
|
48
|
+
[self->recordState.shortBufferSlices addObject:[NSValue valueWithPointer:audioBufferI16]];
|
|
49
|
+
|
|
50
|
+
self->recordState.sliceNSamples = [NSMutableArray new];
|
|
51
|
+
[self->recordState.sliceNSamples addObject:[NSNumber numberWithInt:0]];
|
|
37
52
|
|
|
38
53
|
self->recordState.isRealtime = true;
|
|
39
54
|
self->recordState.isTranscribing = false;
|
|
40
55
|
self->recordState.isCapturing = false;
|
|
56
|
+
self->recordState.isStoppedByAction = false;
|
|
41
57
|
|
|
42
58
|
self->recordState.mSelf = self;
|
|
43
59
|
}
|
|
44
60
|
|
|
61
|
+
- (void)freeBufferIfNeeded {
|
|
62
|
+
if (self->recordState.shortBufferSlices != nil) {
|
|
63
|
+
for (int i = 0; i < [self->recordState.shortBufferSlices count]; i++) {
|
|
64
|
+
int16_t *audioBufferI16 = (int16_t *) [self->recordState.shortBufferSlices[i] pointerValue];
|
|
65
|
+
free(audioBufferI16);
|
|
66
|
+
}
|
|
67
|
+
self->recordState.shortBufferSlices = nil;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
45
71
|
void AudioInputCallback(void * inUserData,
|
|
46
72
|
AudioQueueRef inAQ,
|
|
47
73
|
AudioQueueBufferRef inBuffer,
|
|
@@ -59,16 +85,29 @@ void AudioInputCallback(void * inUserData,
|
|
|
59
85
|
return;
|
|
60
86
|
}
|
|
61
87
|
|
|
88
|
+
int totalNSamples = 0;
|
|
89
|
+
for (int i = 0; i < [state->sliceNSamples count]; i++) {
|
|
90
|
+
totalNSamples += [[state->sliceNSamples objectAtIndex:i] intValue];
|
|
91
|
+
}
|
|
92
|
+
|
|
62
93
|
const int n = inBuffer->mAudioDataByteSize / 2;
|
|
63
|
-
NSLog(@"[RNWhisper] Captured %d new samples", n);
|
|
64
94
|
|
|
65
|
-
|
|
95
|
+
int nSamples = [state->sliceNSamples[state->sliceIndex] intValue];
|
|
96
|
+
|
|
97
|
+
if (totalNSamples + n > state->maxAudioSec * WHISPER_SAMPLE_RATE) {
|
|
66
98
|
NSLog(@"[RNWhisper] Audio buffer is full, stop capturing");
|
|
67
99
|
state->isCapturing = false;
|
|
68
100
|
[state->mSelf stopAudio];
|
|
69
|
-
if (
|
|
101
|
+
if (
|
|
102
|
+
!state->isTranscribing &&
|
|
103
|
+
nSamples == state->nSamplesTranscribing &&
|
|
104
|
+
state->sliceIndex == state->transcribeSliceIndex
|
|
105
|
+
) {
|
|
70
106
|
state->transcribeHandler(state->jobId, @"end", @{});
|
|
71
|
-
} else if (
|
|
107
|
+
} else if (
|
|
108
|
+
!state->isTranscribing &&
|
|
109
|
+
nSamples != state->nSamplesTranscribing
|
|
110
|
+
) {
|
|
72
111
|
state->isTranscribing = true;
|
|
73
112
|
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
|
|
74
113
|
[state->mSelf fullTranscribeSamples:state];
|
|
@@ -77,10 +116,25 @@ void AudioInputCallback(void * inUserData,
|
|
|
77
116
|
return;
|
|
78
117
|
}
|
|
79
118
|
|
|
119
|
+
int audioSliceSec = state->audioSliceSec;
|
|
120
|
+
if (nSamples + n > audioSliceSec * WHISPER_SAMPLE_RATE) {
|
|
121
|
+
// next slice
|
|
122
|
+
state->sliceIndex++;
|
|
123
|
+
nSamples = 0;
|
|
124
|
+
int16_t* audioBufferI16 = (int16_t*) malloc(audioSliceSec * WHISPER_SAMPLE_RATE * sizeof(int16_t));
|
|
125
|
+
[state->shortBufferSlices addObject:[NSValue valueWithPointer:audioBufferI16]];
|
|
126
|
+
[state->sliceNSamples addObject:[NSNumber numberWithInt:0]];
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Append to buffer
|
|
130
|
+
NSLog(@"[RNWhisper] Slice %d has %d samples", state->sliceIndex, nSamples);
|
|
131
|
+
|
|
132
|
+
int16_t* audioBufferI16 = (int16_t*) [state->shortBufferSlices[state->sliceIndex] pointerValue];
|
|
80
133
|
for (int i = 0; i < n; i++) {
|
|
81
|
-
|
|
134
|
+
audioBufferI16[nSamples + i] = ((short*)inBuffer->mAudioData)[i];
|
|
82
135
|
}
|
|
83
|
-
|
|
136
|
+
nSamples += n;
|
|
137
|
+
state->sliceNSamples[state->sliceIndex] = [NSNumber numberWithInt:nSamples];
|
|
84
138
|
|
|
85
139
|
AudioQueueEnqueueBuffer(state->queue, inBuffer, 0, NULL);
|
|
86
140
|
|
|
@@ -93,15 +147,19 @@ void AudioInputCallback(void * inUserData,
|
|
|
93
147
|
}
|
|
94
148
|
|
|
95
149
|
- (void)fullTranscribeSamples:(RNWhisperContextRecordState*) state {
|
|
96
|
-
|
|
150
|
+
int nSamplesOfIndex = [[state->sliceNSamples objectAtIndex:state->transcribeSliceIndex] intValue];
|
|
151
|
+
state->nSamplesTranscribing = nSamplesOfIndex;
|
|
97
152
|
NSLog(@"[RNWhisper] Transcribing %d samples", state->nSamplesTranscribing);
|
|
98
153
|
|
|
154
|
+
int16_t* audioBufferI16 = (int16_t*) [state->shortBufferSlices[state->transcribeSliceIndex] pointerValue];
|
|
155
|
+
float* audioBufferF32 = (float*) malloc(state->nSamplesTranscribing * sizeof(float));
|
|
99
156
|
// convert I16 to F32
|
|
100
157
|
for (int i = 0; i < state->nSamplesTranscribing; i++) {
|
|
101
|
-
|
|
158
|
+
audioBufferF32[i] = (float)audioBufferI16[i] / 32768.0f;
|
|
102
159
|
}
|
|
103
160
|
CFTimeInterval timeStart = CACurrentMediaTime();
|
|
104
|
-
int code = [state->mSelf fullTranscribe:state->jobId audioData:
|
|
161
|
+
int code = [state->mSelf fullTranscribe:state->jobId audioData:audioBufferF32 audioDataCount:state->nSamplesTranscribing options:state->options];
|
|
162
|
+
free(audioBufferF32);
|
|
105
163
|
CFTimeInterval timeEnd = CACurrentMediaTime();
|
|
106
164
|
const float timeRecording = (float) state->nSamplesTranscribing / (float) state->dataFormat.mSampleRate;
|
|
107
165
|
|
|
@@ -109,7 +167,10 @@ void AudioInputCallback(void * inUserData,
|
|
|
109
167
|
@"code": [NSNumber numberWithInt:code],
|
|
110
168
|
@"processTime": [NSNumber numberWithInt:(timeEnd - timeStart) * 1E3],
|
|
111
169
|
@"recordingTime": [NSNumber numberWithInt:timeRecording * 1E3],
|
|
170
|
+
@"isUseSlices": @(state->isUseSlices),
|
|
171
|
+
@"sliceIndex": @(state->transcribeSliceIndex),
|
|
112
172
|
};
|
|
173
|
+
|
|
113
174
|
NSMutableDictionary* result = [base mutableCopy];
|
|
114
175
|
|
|
115
176
|
if (code == 0) {
|
|
@@ -118,7 +179,15 @@ void AudioInputCallback(void * inUserData,
|
|
|
118
179
|
result[@"error"] = [NSString stringWithFormat:@"Transcribe failed with code %d", code];
|
|
119
180
|
}
|
|
120
181
|
|
|
121
|
-
|
|
182
|
+
nSamplesOfIndex = [[state->sliceNSamples objectAtIndex:state->transcribeSliceIndex] intValue];
|
|
183
|
+
if (
|
|
184
|
+
state->isStoppedByAction ||
|
|
185
|
+
(
|
|
186
|
+
!state->isCapturing &&
|
|
187
|
+
state->nSamplesTranscribing == nSamplesOfIndex &&
|
|
188
|
+
state->sliceIndex == state->transcribeSliceIndex
|
|
189
|
+
)
|
|
190
|
+
) {
|
|
122
191
|
NSLog(@"[RNWhisper] Transcribe end");
|
|
123
192
|
result[@"isStoppedByAction"] = @(state->isStoppedByAction);
|
|
124
193
|
result[@"isCapturing"] = @(false);
|
|
@@ -130,13 +199,25 @@ void AudioInputCallback(void * inUserData,
|
|
|
130
199
|
result[@"isCapturing"] = @(true);
|
|
131
200
|
state->transcribeHandler(state->jobId, @"transcribe", result);
|
|
132
201
|
}
|
|
133
|
-
state->isTranscribing = false;
|
|
134
202
|
|
|
135
|
-
if (
|
|
203
|
+
if (
|
|
204
|
+
// If no more samples on current slice, move to next slice
|
|
205
|
+
state->nSamplesTranscribing == nSamplesOfIndex &&
|
|
206
|
+
state->transcribeSliceIndex != state->sliceIndex
|
|
207
|
+
) {
|
|
208
|
+
state->transcribeSliceIndex++;
|
|
209
|
+
state->nSamplesTranscribing = 0;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
if (
|
|
213
|
+
!state->isCapturing &&
|
|
214
|
+
state->nSamplesTranscribing != nSamplesOfIndex
|
|
215
|
+
) {
|
|
136
216
|
state->isTranscribing = true;
|
|
137
217
|
// Finish transcribing the rest of the samples
|
|
138
218
|
[self fullTranscribeSamples:state];
|
|
139
219
|
}
|
|
220
|
+
state->isTranscribing = false;
|
|
140
221
|
}
|
|
141
222
|
|
|
142
223
|
- (bool)isCapturing {
|
|
@@ -154,7 +235,6 @@ void AudioInputCallback(void * inUserData,
|
|
|
154
235
|
self->recordState.transcribeHandler = onTranscribe;
|
|
155
236
|
self->recordState.jobId = jobId;
|
|
156
237
|
[self prepareRealtime:options];
|
|
157
|
-
self->recordState.nSamples = 0;
|
|
158
238
|
|
|
159
239
|
OSStatus status = AudioQueueNewInput(
|
|
160
240
|
&self->recordState.dataFormat,
|
|
@@ -205,6 +285,7 @@ void AudioInputCallback(void * inUserData,
|
|
|
205
285
|
if (!self->recordState.isRealtime || !self->recordState.isCapturing) {
|
|
206
286
|
return;
|
|
207
287
|
}
|
|
288
|
+
self->recordState.isTranscribing = false;
|
|
208
289
|
self->recordState.isCapturing = false;
|
|
209
290
|
self->recordState.isStoppedByAction = true;
|
|
210
291
|
[self stopAudio];
|
|
@@ -239,7 +320,7 @@ void AudioInputCallback(void * inUserData,
|
|
|
239
320
|
params.n_threads = max_threads;
|
|
240
321
|
params.offset_ms = 0;
|
|
241
322
|
params.no_context = true;
|
|
242
|
-
params.single_segment =
|
|
323
|
+
params.single_segment = false;
|
|
243
324
|
|
|
244
325
|
if (options[@"maxLen"] != nil) {
|
|
245
326
|
params.max_len = [options[@"maxLen"] intValue];
|
|
@@ -321,6 +402,7 @@ void AudioInputCallback(void * inUserData,
|
|
|
321
402
|
- (void)invalidate {
|
|
322
403
|
[self stopCurrentTranscribe];
|
|
323
404
|
whisper_free(self->ctx);
|
|
405
|
+
[self freeBufferIfNeeded];
|
|
324
406
|
}
|
|
325
407
|
|
|
326
408
|
@end
|
package/lib/commonjs/index.js
CHANGED
|
@@ -45,6 +45,51 @@ class WhisperContext {
|
|
|
45
45
|
const jobId = Math.floor(Math.random() * 10000);
|
|
46
46
|
await RNWhisper.startRealtimeTranscribe(this.id, jobId, options);
|
|
47
47
|
let lastTranscribePayload;
|
|
48
|
+
const slices = [];
|
|
49
|
+
let sliceIndex = 0;
|
|
50
|
+
let tOffset = 0;
|
|
51
|
+
const putSlice = payload => {
|
|
52
|
+
if (!payload.isUseSlices) return;
|
|
53
|
+
if (sliceIndex !== payload.sliceIndex) {
|
|
54
|
+
var _slices$sliceIndex, _segments;
|
|
55
|
+
const {
|
|
56
|
+
segments = []
|
|
57
|
+
} = ((_slices$sliceIndex = slices[sliceIndex]) === null || _slices$sliceIndex === void 0 ? void 0 : _slices$sliceIndex.data) || {};
|
|
58
|
+
tOffset = ((_segments = segments[segments.length - 1]) === null || _segments === void 0 ? void 0 : _segments.t1) || 0;
|
|
59
|
+
}
|
|
60
|
+
({
|
|
61
|
+
sliceIndex
|
|
62
|
+
} = payload);
|
|
63
|
+
slices[sliceIndex] = {
|
|
64
|
+
...payload,
|
|
65
|
+
data: payload.data ? {
|
|
66
|
+
...payload.data,
|
|
67
|
+
segments: payload.data.segments.map(segment => ({
|
|
68
|
+
...segment,
|
|
69
|
+
t0: segment.t0 + tOffset,
|
|
70
|
+
t1: segment.t1 + tOffset
|
|
71
|
+
})) || []
|
|
72
|
+
} : undefined
|
|
73
|
+
};
|
|
74
|
+
};
|
|
75
|
+
const mergeSlicesIfNeeded = payload => {
|
|
76
|
+
if (!payload.isUseSlices) return payload;
|
|
77
|
+
const mergedPayload = {};
|
|
78
|
+
slices.forEach(slice => {
|
|
79
|
+
var _mergedPayload$data, _slice$data, _mergedPayload$data2, _slice$data2;
|
|
80
|
+
mergedPayload.data = {
|
|
81
|
+
result: (((_mergedPayload$data = mergedPayload.data) === null || _mergedPayload$data === void 0 ? void 0 : _mergedPayload$data.result) || '') + (((_slice$data = slice.data) === null || _slice$data === void 0 ? void 0 : _slice$data.result) || ''),
|
|
82
|
+
segments: [...((mergedPayload === null || mergedPayload === void 0 ? void 0 : (_mergedPayload$data2 = mergedPayload.data) === null || _mergedPayload$data2 === void 0 ? void 0 : _mergedPayload$data2.segments) || []), ...(((_slice$data2 = slice.data) === null || _slice$data2 === void 0 ? void 0 : _slice$data2.segments) || [])]
|
|
83
|
+
};
|
|
84
|
+
mergedPayload.processTime = slice.processTime;
|
|
85
|
+
mergedPayload.recordingTime = ((mergedPayload === null || mergedPayload === void 0 ? void 0 : mergedPayload.recordingTime) || 0) + slice.recordingTime;
|
|
86
|
+
});
|
|
87
|
+
return {
|
|
88
|
+
...payload,
|
|
89
|
+
...mergedPayload,
|
|
90
|
+
slices
|
|
91
|
+
};
|
|
92
|
+
};
|
|
48
93
|
return {
|
|
49
94
|
stop: () => RNWhisper.abortTranscribe(this.id, jobId),
|
|
50
95
|
subscribe: callback => {
|
|
@@ -55,10 +100,11 @@ class WhisperContext {
|
|
|
55
100
|
} = evt;
|
|
56
101
|
if (contextId !== this.id || evt.jobId !== jobId) return;
|
|
57
102
|
lastTranscribePayload = payload;
|
|
103
|
+
putSlice(payload);
|
|
58
104
|
callback({
|
|
59
105
|
contextId,
|
|
60
106
|
jobId: evt.jobId,
|
|
61
|
-
...payload
|
|
107
|
+
...mergeSlicesIfNeeded(payload)
|
|
62
108
|
});
|
|
63
109
|
});
|
|
64
110
|
const endListener = EventEmitter.addListener(EVENT_ON_REALTIME_TRANSCRIBE_END, evt => {
|
|
@@ -67,11 +113,15 @@ class WhisperContext {
|
|
|
67
113
|
payload
|
|
68
114
|
} = evt;
|
|
69
115
|
if (contextId !== this.id || evt.jobId !== jobId) return;
|
|
116
|
+
const lastPayload = {
|
|
117
|
+
...lastTranscribePayload,
|
|
118
|
+
...payload
|
|
119
|
+
};
|
|
120
|
+
putSlice(lastPayload);
|
|
70
121
|
callback({
|
|
71
122
|
contextId,
|
|
72
123
|
jobId: evt.jobId,
|
|
73
|
-
...
|
|
74
|
-
...payload,
|
|
124
|
+
...mergeSlicesIfNeeded(lastPayload),
|
|
75
125
|
isCapturing: false
|
|
76
126
|
});
|
|
77
127
|
transcribeListener.remove();
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"names":["_reactNative","require","LINKING_ERROR","Platform","select","ios","default","RNWhisper","NativeModules","Proxy","get","Error","EventEmitter","OS","NativeEventEmitter","DeviceEventEmitter","EVENT_ON_REALTIME_TRANSCRIBE","EVENT_ON_REALTIME_TRANSCRIBE_END","WhisperContext","constructor","id","transcribe","path","options","arguments","length","undefined","jobId","Math","floor","random","stop","abortTranscribe","promise","transcribeFile","transcribeRealtime","startRealtimeTranscribe","lastTranscribePayload","subscribe","callback","transcribeListener","addListener","evt","contextId","
|
|
1
|
+
{"version":3,"names":["_reactNative","require","LINKING_ERROR","Platform","select","ios","default","RNWhisper","NativeModules","Proxy","get","Error","EventEmitter","OS","NativeEventEmitter","DeviceEventEmitter","EVENT_ON_REALTIME_TRANSCRIBE","EVENT_ON_REALTIME_TRANSCRIBE_END","WhisperContext","constructor","id","transcribe","path","options","arguments","length","undefined","jobId","Math","floor","random","stop","abortTranscribe","promise","transcribeFile","transcribeRealtime","startRealtimeTranscribe","lastTranscribePayload","slices","sliceIndex","tOffset","putSlice","payload","isUseSlices","_slices$sliceIndex","_segments","segments","data","t1","map","segment","t0","mergeSlicesIfNeeded","mergedPayload","forEach","slice","_mergedPayload$data","_slice$data","_mergedPayload$data2","_slice$data2","result","processTime","recordingTime","subscribe","callback","transcribeListener","addListener","evt","contextId","endListener","lastPayload","isCapturing","remove","release","releaseContext","initWhisper","filePath","initContext","releaseAllWhisper","releaseAllContexts"],"sourceRoot":"../../src","sources":["index.tsx"],"mappings":";;;;;;;AAAA,IAAAA,YAAA,GAAAC,OAAA;AAQA,MAAMC,aAAa,GAChB,sEAAqEC,qBAAQ,CAACC,MAAM,CAAC;EAAEC,GAAG,EAAE,gCAAgC;EAAEC,OAAO,EAAE;AAAG,CAAC,CAC3I,oDAAmD;AAEtD,MAAMC,SAAS,GAAGC,0BAAa,CAACD,SAAS,GACrCC,0BAAa,CAACD,SAAS,GACvB,IAAIE,KAAK,CACT,CAAC,CAAC,EACF;EACEC,GAAGA,CAAA,EAAG;IACJ,MAAM,IAAIC,KAAK,CAACT,aAAa,CAAC;EAChC;AACF,CAAC,CACF;AAEH,IAAIU,YAA2D;AAC/D,IAAIT,qBAAQ,CAACU,EAAE,KAAK,KAAK,EAAE;EACzBD,YAAY,GAAG,IAAIE,+BAAkB,CAACP,SAAS,CAAC;AAClD;AACA,IAAIJ,qBAAQ,CAACU,EAAE,KAAK,SAAS,EAAE;EAC7BD,YAAY,GAAGG,+BAAkB;AACnC;AAEA,MAAMC,4BAA4B,GAAG,iCAAiC;AACtE,MAAMC,gCAAgC,GAAG,oCAAoC;AAiG7E,MAAMC,cAAc,CAAC;EAGnBC,WAAWA,CAACC,EAAU,EAAE;IACtB,IAAI,CAACA,EAAE,GAAGA,EAAE;EACd;;EAEA;EACAC,UAAUA,CAACC,IAAY,EAKrB;IAAA,IALuBC,OAA0B,GAAAC,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;IAMtD,MAAMG,KAAa,GAAGC,IAAI,CAACC,KAAK,CAACD,IAAI,CAACE,MAAM,EAAE,GAAG,KAAK,CAAC;IACvD,OAAO;MACLC,IAAI,EAAEA,CAAA,KAAMxB,SAAS,CAACyB,eAAe,CAAC,IAAI,CAACZ,EAAE,EAAEO,KAAK,CAAC;MACrDM,OAAO,EAAE1B,SAAS,CAAC2B,cAAc,CAAC,IAAI,CAACd,EAAE,EAAEO,KAAK,EAAEL,IAAI,EAAEC,OAAO;IACjE,CAAC;EACH;;EAEA;EACA,MAAMY,kBAAkBA,CAAA,EAKrB;IAAA,IALsBZ,OAAkC,GAAAC,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;IAM9D,MAAMG,KAAa,GAAGC,IAAI,CAACC,KAAK,CAACD,IAAI,CAACE,MAAM,EAAE,GAAG,KAAK,CAAC;IACvD,MAAMvB,SAAS,CAAC6B,uBAAuB,CAAC,IAAI,CAAChB,EAAE,EAAEO,KAAK,EAAEJ,OAAO,CAAC;IAChE,IAAIc,qBAAsD;IAE1D,MAAMC,MAAyC,GAAG,EAAE;IACpD,IAAIC,UAAkB,GAAG,CAAC;IAC1B,IAAIC,OAAe,GAAG,CAAC;IAEvB,MAAMC,QAAQ,GAAIC,OAAwC,IAAK;MAC7D,IAAI,CAACA,OAAO,CAACC,WAAW,EAAE;MAC1B,IAAIJ,UAAU,KAAKG,OAAO,CAACH,UAAU,EAAE;QAAA,IAAAK,kBAAA,EAAAC,SAAA;QACrC,MAAM;UAAEC,QAAQ,GAAG;QAAG,CAAC,GAAG,EAAAF,kBAAA,GAAAN,MAAM,CAACC,UAAU,CAAC,cAAAK,kBAAA,uBAAlBA,kBAAA,CAAoBG,IAAI,KAAI,CAAC,CAAC;QACxDP,OAAO,GAAG,EAAAK,SAAA,GAAAC,QAAQ,CAACA,QAAQ,CAACrB,MAAM,GAAG,CAAC,CAAC,cAAAoB,SAAA,uBAA7BA,SAAA,CAA+BG,EAAE,KAAI,CAAC;MAClD;MACA,CAAC;QAAET;MAAW,CAAC,GAAGG,OAAO;MACzBJ,MAAM,CAACC,UAAU,CAAC,GAAG;QACnB,GAAGG,OAAO;QACVK,IAAI,EAAEL,OAAO,CAACK,IAAI,GAAG;UACnB,GAAGL,OAAO,CAACK,IAAI;UACfD,QAAQ,EAAEJ,OAAO,CAACK,IAAI,CAACD,QAAQ,CAACG,GAAG,CAAEC,OAAO,KAAM;YAChD,GAAGA,OAAO;YACVC,EAAE,EAAED,OAAO,CAACC,EAAE,GAAGX,OAAO;YACxBQ,EAAE,EAAEE,OAAO,CAACF,EAAE,GAAGR;UACnB,CAAC,CAAC,CAAC,IAAI;QACT,CAAC,GAAGd;MACN,CAAC;IACH,CAAC;IAED,MAAM0B,mBAAmB,GAAIV,OAAwC,IAAsC;MACzG,IAAI,CAACA,OAAO,CAACC,WAAW,EAAE,OAAOD,OAAO;MAExC,MAAMW,aAAkB,GAAG,CAAC,CAAC;MAC7Bf,MAAM,CAACgB,OAAO,CACXC,KAAK,IAAK;QAAA,IAAAC,mBAAA,EAAAC,WAAA,EAAAC,oBAAA,EAAAC,YAAA;QACTN,aAAa,CAACN,IAAI,GAAG;UACnBa,MAAM,EAAE,CAAC,EAAAJ,mBAAA,GAAAH,aAAa,CAACN,IAAI,cAAAS,mBAAA,uBAAlBA,mBAAA,CAAoBI,MAAM,KAAI,EAAE,KAAK,EAAAH,WAAA,GAAAF,KAAK,CAACR,IAAI,cAAAU,WAAA,uBAAVA,WAAA,CAAYG,MAAM,KAAI,EAAE,CAAC;UACvEd,QAAQ,EAAE,CACR,IAAI,CAAAO,aAAa,aAAbA,aAAa,wBAAAK,oBAAA,GAAbL,aAAa,CAAEN,IAAI,cAAAW,oBAAA,uBAAnBA,oBAAA,CAAqBZ,QAAQ,KAAI,EAAE,CAAC,EACxC,IAAI,EAAAa,YAAA,GAAAJ,KAAK,CAACR,IAAI,cAAAY,YAAA,uBAAVA,YAAA,CAAYb,QAAQ,KAAI,EAAE,CAAC;QAEnC,CAAC;QACDO,aAAa,CAACQ,WAAW,GAAGN,KAAK,CAACM,WAAW;QAC7CR,aAAa,CAACS,aAAa,GAAG,CAAC,CAAAT,aAAa,aAAbA,aAAa,uBAAbA,aAAa,CAAES,aAAa,KAAI,CAAC,IAAIP,KAAK,CAACO,aAAa;MACzF,CAAC,CACF;MACD,OAAO;QAAE,GAAGpB,OAAO;QAAE,GAAGW,aAAa;QAAEf;MAAO,CAAC;IACjD,CAAC;IAED,OAAO;MACLP,IAAI,EAAEA,CAAA,KAAMxB,SAAS,CAACyB,eAAe,CAAC,IAAI,CAACZ,EAAE,EAAEO,KAAK,CAAC;MACrDoC,SAAS,EAAGC,QAAkD,IAAK;QACjE,MAAMC,kBAAkB,GAAGrD,YAAY,CAACsD,WAAW,CACjDlD,4BAA4B,EAC3BmD,GAAkC,IAAK;UACtC,MAAM;YAAEC,SAAS;YAAE1B;UAAQ,CAAC,GAAGyB,GAAG;UAClC,IAAIC,SAAS,KAAK,IAAI,CAAChD,EAAE,IAAI+C,GAAG,CAACxC,KAAK,KAAKA,KAAK,EAAE;UAClDU,qBAAqB,GAAGK,OAAO;UAC/BD,QAAQ,CAACC,OAAO,CAAC;UACjBsB,QAAQ,CAAC;YACPI,SAAS;YACTzC,KAAK,EAAEwC,GAAG,CAACxC,KAAK;YAChB,GAAGyB,mBAAmB,CAACV,OAAO;UAChC,CAAC,CAAC;QACJ,CAAC,CACF;QACD,MAAM2B,WAAW,GAAGzD,YAAY,CAACsD,WAAW,CAC1CjD,gCAAgC,EAC/BkD,GAAkC,IAAK;UACtC,MAAM;YAAEC,SAAS;YAAE1B;UAAQ,CAAC,GAAGyB,GAAG;UAClC,IAAIC,SAAS,KAAK,IAAI,CAAChD,EAAE,IAAI+C,GAAG,CAACxC,KAAK,KAAKA,KAAK,EAAE;UAClD,MAAM2C,WAAW,GAAG;YAClB,GAAGjC,qBAAqB;YACxB,GAAGK;UACL,CAAC;UACDD,QAAQ,CAAC6B,WAAW,CAAC;UACrBN,QAAQ,CAAC;YACPI,SAAS;YACTzC,KAAK,EAAEwC,GAAG,CAACxC,KAAK;YAChB,GAAGyB,mBAAmB,CAACkB,WAAW,CAAC;YACnCC,WAAW,EAAE;UACf,CAAC,CAAC;UACFN,kBAAkB,CAACO,MAAM,EAAE;UAC3BH,WAAW,CAACG,MAAM,EAAE;QACtB,CAAC,CACF;MACH;IACF,CAAC;EACH;EAEA,MAAMC,OAAOA,CAAA,EAAG;IACd,OAAOlE,SAAS,CAACmE,cAAc,CAAC,IAAI,CAACtD,EAAE,CAAC;EAC1C;AACF;AAEO,eAAeuD,WAAWA,CAAA,EAEN;EAAA,IADzB;IAAEC;EAAgC,CAAC,GAAApD,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;EAExC,MAAMJ,EAAE,GAAG,MAAMb,SAAS,CAACsE,WAAW,CAACD,QAAQ,CAAC;EAChD,OAAO,IAAI1D,cAAc,CAACE,EAAE,CAAC;AAC/B;AAEO,eAAe0D,iBAAiBA,CAAA,EAAkB;EACvD,OAAOvE,SAAS,CAACwE,kBAAkB,EAAE;AACvC"}
|
package/lib/module/index.js
CHANGED
|
@@ -38,6 +38,51 @@ class WhisperContext {
|
|
|
38
38
|
const jobId = Math.floor(Math.random() * 10000);
|
|
39
39
|
await RNWhisper.startRealtimeTranscribe(this.id, jobId, options);
|
|
40
40
|
let lastTranscribePayload;
|
|
41
|
+
const slices = [];
|
|
42
|
+
let sliceIndex = 0;
|
|
43
|
+
let tOffset = 0;
|
|
44
|
+
const putSlice = payload => {
|
|
45
|
+
if (!payload.isUseSlices) return;
|
|
46
|
+
if (sliceIndex !== payload.sliceIndex) {
|
|
47
|
+
var _slices$sliceIndex, _segments;
|
|
48
|
+
const {
|
|
49
|
+
segments = []
|
|
50
|
+
} = ((_slices$sliceIndex = slices[sliceIndex]) === null || _slices$sliceIndex === void 0 ? void 0 : _slices$sliceIndex.data) || {};
|
|
51
|
+
tOffset = ((_segments = segments[segments.length - 1]) === null || _segments === void 0 ? void 0 : _segments.t1) || 0;
|
|
52
|
+
}
|
|
53
|
+
({
|
|
54
|
+
sliceIndex
|
|
55
|
+
} = payload);
|
|
56
|
+
slices[sliceIndex] = {
|
|
57
|
+
...payload,
|
|
58
|
+
data: payload.data ? {
|
|
59
|
+
...payload.data,
|
|
60
|
+
segments: payload.data.segments.map(segment => ({
|
|
61
|
+
...segment,
|
|
62
|
+
t0: segment.t0 + tOffset,
|
|
63
|
+
t1: segment.t1 + tOffset
|
|
64
|
+
})) || []
|
|
65
|
+
} : undefined
|
|
66
|
+
};
|
|
67
|
+
};
|
|
68
|
+
const mergeSlicesIfNeeded = payload => {
|
|
69
|
+
if (!payload.isUseSlices) return payload;
|
|
70
|
+
const mergedPayload = {};
|
|
71
|
+
slices.forEach(slice => {
|
|
72
|
+
var _mergedPayload$data, _slice$data, _mergedPayload$data2, _slice$data2;
|
|
73
|
+
mergedPayload.data = {
|
|
74
|
+
result: (((_mergedPayload$data = mergedPayload.data) === null || _mergedPayload$data === void 0 ? void 0 : _mergedPayload$data.result) || '') + (((_slice$data = slice.data) === null || _slice$data === void 0 ? void 0 : _slice$data.result) || ''),
|
|
75
|
+
segments: [...((mergedPayload === null || mergedPayload === void 0 ? void 0 : (_mergedPayload$data2 = mergedPayload.data) === null || _mergedPayload$data2 === void 0 ? void 0 : _mergedPayload$data2.segments) || []), ...(((_slice$data2 = slice.data) === null || _slice$data2 === void 0 ? void 0 : _slice$data2.segments) || [])]
|
|
76
|
+
};
|
|
77
|
+
mergedPayload.processTime = slice.processTime;
|
|
78
|
+
mergedPayload.recordingTime = ((mergedPayload === null || mergedPayload === void 0 ? void 0 : mergedPayload.recordingTime) || 0) + slice.recordingTime;
|
|
79
|
+
});
|
|
80
|
+
return {
|
|
81
|
+
...payload,
|
|
82
|
+
...mergedPayload,
|
|
83
|
+
slices
|
|
84
|
+
};
|
|
85
|
+
};
|
|
41
86
|
return {
|
|
42
87
|
stop: () => RNWhisper.abortTranscribe(this.id, jobId),
|
|
43
88
|
subscribe: callback => {
|
|
@@ -48,10 +93,11 @@ class WhisperContext {
|
|
|
48
93
|
} = evt;
|
|
49
94
|
if (contextId !== this.id || evt.jobId !== jobId) return;
|
|
50
95
|
lastTranscribePayload = payload;
|
|
96
|
+
putSlice(payload);
|
|
51
97
|
callback({
|
|
52
98
|
contextId,
|
|
53
99
|
jobId: evt.jobId,
|
|
54
|
-
...payload
|
|
100
|
+
...mergeSlicesIfNeeded(payload)
|
|
55
101
|
});
|
|
56
102
|
});
|
|
57
103
|
const endListener = EventEmitter.addListener(EVENT_ON_REALTIME_TRANSCRIBE_END, evt => {
|
|
@@ -60,11 +106,15 @@ class WhisperContext {
|
|
|
60
106
|
payload
|
|
61
107
|
} = evt;
|
|
62
108
|
if (contextId !== this.id || evt.jobId !== jobId) return;
|
|
109
|
+
const lastPayload = {
|
|
110
|
+
...lastTranscribePayload,
|
|
111
|
+
...payload
|
|
112
|
+
};
|
|
113
|
+
putSlice(lastPayload);
|
|
63
114
|
callback({
|
|
64
115
|
contextId,
|
|
65
116
|
jobId: evt.jobId,
|
|
66
|
-
...
|
|
67
|
-
...payload,
|
|
117
|
+
...mergeSlicesIfNeeded(lastPayload),
|
|
68
118
|
isCapturing: false
|
|
69
119
|
});
|
|
70
120
|
transcribeListener.remove();
|
package/lib/module/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"names":["NativeEventEmitter","DeviceEventEmitter","NativeModules","Platform","LINKING_ERROR","select","ios","default","RNWhisper","Proxy","get","Error","EventEmitter","OS","EVENT_ON_REALTIME_TRANSCRIBE","EVENT_ON_REALTIME_TRANSCRIBE_END","WhisperContext","constructor","id","transcribe","path","options","arguments","length","undefined","jobId","Math","floor","random","stop","abortTranscribe","promise","transcribeFile","transcribeRealtime","startRealtimeTranscribe","lastTranscribePayload","subscribe","callback","transcribeListener","addListener","evt","contextId","
|
|
1
|
+
{"version":3,"names":["NativeEventEmitter","DeviceEventEmitter","NativeModules","Platform","LINKING_ERROR","select","ios","default","RNWhisper","Proxy","get","Error","EventEmitter","OS","EVENT_ON_REALTIME_TRANSCRIBE","EVENT_ON_REALTIME_TRANSCRIBE_END","WhisperContext","constructor","id","transcribe","path","options","arguments","length","undefined","jobId","Math","floor","random","stop","abortTranscribe","promise","transcribeFile","transcribeRealtime","startRealtimeTranscribe","lastTranscribePayload","slices","sliceIndex","tOffset","putSlice","payload","isUseSlices","_slices$sliceIndex","_segments","segments","data","t1","map","segment","t0","mergeSlicesIfNeeded","mergedPayload","forEach","slice","_mergedPayload$data","_slice$data","_mergedPayload$data2","_slice$data2","result","processTime","recordingTime","subscribe","callback","transcribeListener","addListener","evt","contextId","endListener","lastPayload","isCapturing","remove","release","releaseContext","initWhisper","filePath","initContext","releaseAllWhisper","releaseAllContexts"],"sourceRoot":"../../src","sources":["index.tsx"],"mappings":"AAAA,SACEA,kBAAkB,EAClBC,kBAAkB,EAClBC,aAAa,EACbC,QAAQ,QAEH,cAAc;AAErB,MAAMC,aAAa,GAChB,sEAAqED,QAAQ,CAACE,MAAM,CAAC;EAAEC,GAAG,EAAE,gCAAgC;EAAEC,OAAO,EAAE;AAAG,CAAC,CAC3I,oDAAmD;AAEtD,MAAMC,SAAS,GAAGN,aAAa,CAACM,SAAS,GACrCN,aAAa,CAACM,SAAS,GACvB,IAAIC,KAAK,CACT,CAAC,CAAC,EACF;EACEC,GAAGA,CAAA,EAAG;IACJ,MAAM,IAAIC,KAAK,CAACP,aAAa,CAAC;EAChC;AACF,CAAC,CACF;AAEH,IAAIQ,YAA2D;AAC/D,IAAIT,QAAQ,CAACU,EAAE,KAAK,KAAK,EAAE;EACzBD,YAAY,GAAG,IAAIZ,kBAAkB,CAACQ,SAAS,CAAC;AAClD;AACA,IAAIL,QAAQ,CAACU,EAAE,KAAK,SAAS,EAAE;EAC7BD,YAAY,GAAGX,kBAAkB;AACnC;AAEA,MAAMa,4BAA4B,GAAG,iCAAiC;AACtE,MAAMC,gCAAgC,GAAG,oCAAoC;AAiG7E,MAAMC,cAAc,CAAC;EAGnBC,WAAWA,CAACC,EAAU,EAAE;IACtB,IAAI,CAACA,EAAE,GAAGA,EAAE;EACd;;EAEA;EACAC,UAAUA,CAACC,IAAY,EAKrB;IAAA,IALuBC,OAA0B,GAAAC,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;IAMtD,MAAMG,KAAa,GAAGC,IAAI,CAACC,KAAK,CAACD,IAAI,CAACE,MAAM,EAAE,GAAG,KAAK,CAAC;IACvD,OAAO;MACLC,IAAI,EAAEA,CAAA,KAAMrB,SAAS,CAACsB,eAAe,CAAC,IAAI,CAACZ,EAAE,EAAEO,KAAK,CAAC;MACrDM,OAAO,EAAEvB,SAAS,CAACwB,cAAc,CAAC,IAAI,CAACd,EAAE,EAAEO,KAAK,EAAEL,IAAI,EAAEC,OAAO;IACjE,CAAC;EACH;;EAEA;EACA,MAAMY,kBAAkBA,CAAA,EAKrB;IAAA,IALsBZ,OAAkC,GAAAC,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;IAM9D,MAAMG,KAAa,GAAGC,IAAI,CAACC,KAAK,CAACD,IAAI,CAACE,MAAM,EAAE,GAAG,KAAK,CAAC;IACvD,MAAMpB,SAAS,CAAC0B,uBAAuB,CAAC,IAAI,CAAChB,EAAE,EAAEO,KAAK,EAAEJ,OAAO,CAAC;IAChE,IAAIc,qBAAsD;IAE1D,MAAMC,MAAyC,GAAG,EAAE;IACpD,IAAIC,UAAkB,GAAG,CAAC;IAC1B,IAAIC,OAAe,GAAG,CAAC;IAEvB,MAAMC,QAAQ,GAAIC,OAAwC,IAAK;MAC7D,IAAI,CAACA,OAAO,CAACC,WAAW,EAAE;MAC1B,IAAIJ,UAAU,KAAKG,OAAO,CAACH,UAAU,EAAE;QAAA,IAAAK,kBAAA,EAAAC,SAAA;QACrC,MAAM;UAAEC,QAAQ,GAAG;QAAG,CAAC,GAAG,EAAAF,kBAAA,GAAAN,MAAM,CAACC,UAAU,CAAC,cAAAK,kBAAA,uBAAlBA,kBAAA,CAAoBG,IAAI,KAAI,CAAC,CAAC;QACxDP,OAAO,GAAG,EAAAK,SAAA,GAAAC,QAAQ,CAACA,QAAQ,CAACrB,MAAM,GAAG,CAAC,CAAC,cAAAoB,SAAA,uBAA7BA,SAAA,CAA+BG,EAAE,KAAI,CAAC;MAClD;MACA,CAAC;QAAET;MAAW,CAAC,GAAGG,OAAO;MACzBJ,MAAM,CAACC,UAAU,CAAC,GAAG;QACnB,GAAGG,OAAO;QACVK,IAAI,EAAEL,OAAO,CAACK,IAAI,GAAG;UACnB,GAAGL,OAAO,CAACK,IAAI;UACfD,QAAQ,EAAEJ,OAAO,CAACK,IAAI,CAACD,QAAQ,CAACG,GAAG,CAAEC,OAAO,KAAM;YAChD,GAAGA,OAAO;YACVC,EAAE,EAAED,OAAO,CAACC,EAAE,GAAGX,OAAO;YACxBQ,EAAE,EAAEE,OAAO,CAACF,EAAE,GAAGR;UACnB,CAAC,CAAC,CAAC,IAAI;QACT,CAAC,GAAGd;MACN,CAAC;IACH,CAAC;IAED,MAAM0B,mBAAmB,GAAIV,OAAwC,IAAsC;MACzG,IAAI,CAACA,OAAO,CAACC,WAAW,EAAE,OAAOD,OAAO;MAExC,MAAMW,aAAkB,GAAG,CAAC,CAAC;MAC7Bf,MAAM,CAACgB,OAAO,CACXC,KAAK,IAAK;QAAA,IAAAC,mBAAA,EAAAC,WAAA,EAAAC,oBAAA,EAAAC,YAAA;QACTN,aAAa,CAACN,IAAI,GAAG;UACnBa,MAAM,EAAE,CAAC,EAAAJ,mBAAA,GAAAH,aAAa,CAACN,IAAI,cAAAS,mBAAA,uBAAlBA,mBAAA,CAAoBI,MAAM,KAAI,EAAE,KAAK,EAAAH,WAAA,GAAAF,KAAK,CAACR,IAAI,cAAAU,WAAA,uBAAVA,WAAA,CAAYG,MAAM,KAAI,EAAE,CAAC;UACvEd,QAAQ,EAAE,CACR,IAAI,CAAAO,aAAa,aAAbA,aAAa,wBAAAK,oBAAA,GAAbL,aAAa,CAAEN,IAAI,cAAAW,oBAAA,uBAAnBA,oBAAA,CAAqBZ,QAAQ,KAAI,EAAE,CAAC,EACxC,IAAI,EAAAa,YAAA,GAAAJ,KAAK,CAACR,IAAI,cAAAY,YAAA,uBAAVA,YAAA,CAAYb,QAAQ,KAAI,EAAE,CAAC;QAEnC,CAAC;QACDO,aAAa,CAACQ,WAAW,GAAGN,KAAK,CAACM,WAAW;QAC7CR,aAAa,CAACS,aAAa,GAAG,CAAC,CAAAT,aAAa,aAAbA,aAAa,uBAAbA,aAAa,CAAES,aAAa,KAAI,CAAC,IAAIP,KAAK,CAACO,aAAa;MACzF,CAAC,CACF;MACD,OAAO;QAAE,GAAGpB,OAAO;QAAE,GAAGW,aAAa;QAAEf;MAAO,CAAC;IACjD,CAAC;IAED,OAAO;MACLP,IAAI,EAAEA,CAAA,KAAMrB,SAAS,CAACsB,eAAe,CAAC,IAAI,CAACZ,EAAE,EAAEO,KAAK,CAAC;MACrDoC,SAAS,EAAGC,QAAkD,IAAK;QACjE,MAAMC,kBAAkB,GAAGnD,YAAY,CAACoD,WAAW,CACjDlD,4BAA4B,EAC3BmD,GAAkC,IAAK;UACtC,MAAM;YAAEC,SAAS;YAAE1B;UAAQ,CAAC,GAAGyB,GAAG;UAClC,IAAIC,SAAS,KAAK,IAAI,CAAChD,EAAE,IAAI+C,GAAG,CAACxC,KAAK,KAAKA,KAAK,EAAE;UAClDU,qBAAqB,GAAGK,OAAO;UAC/BD,QAAQ,CAACC,OAAO,CAAC;UACjBsB,QAAQ,CAAC;YACPI,SAAS;YACTzC,KAAK,EAAEwC,GAAG,CAACxC,KAAK;YAChB,GAAGyB,mBAAmB,CAACV,OAAO;UAChC,CAAC,CAAC;QACJ,CAAC,CACF;QACD,MAAM2B,WAAW,GAAGvD,YAAY,CAACoD,WAAW,CAC1CjD,gCAAgC,EAC/BkD,GAAkC,IAAK;UACtC,MAAM;YAAEC,SAAS;YAAE1B;UAAQ,CAAC,GAAGyB,GAAG;UAClC,IAAIC,SAAS,KAAK,IAAI,CAAChD,EAAE,IAAI+C,GAAG,CAACxC,KAAK,KAAKA,KAAK,EAAE;UAClD,MAAM2C,WAAW,GAAG;YAClB,GAAGjC,qBAAqB;YACxB,GAAGK;UACL,CAAC;UACDD,QAAQ,CAAC6B,WAAW,CAAC;UACrBN,QAAQ,CAAC;YACPI,SAAS;YACTzC,KAAK,EAAEwC,GAAG,CAACxC,KAAK;YAChB,GAAGyB,mBAAmB,CAACkB,WAAW,CAAC;YACnCC,WAAW,EAAE;UACf,CAAC,CAAC;UACFN,kBAAkB,CAACO,MAAM,EAAE;UAC3BH,WAAW,CAACG,MAAM,EAAE;QACtB,CAAC,CACF;MACH;IACF,CAAC;EACH;EAEA,MAAMC,OAAOA,CAAA,EAAG;IACd,OAAO/D,SAAS,CAACgE,cAAc,CAAC,IAAI,CAACtD,EAAE,CAAC;EAC1C;AACF;AAEA,OAAO,eAAeuD,WAAWA,CAAA,EAEN;EAAA,IADzB;IAAEC;EAAgC,CAAC,GAAApD,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;EAExC,MAAMJ,EAAE,GAAG,MAAMV,SAAS,CAACmE,WAAW,CAACD,QAAQ,CAAC;EAChD,OAAO,IAAI1D,cAAc,CAACE,EAAE,CAAC;AAC/B;AAEA,OAAO,eAAe0D,iBAAiBA,CAAA,EAAkB;EACvD,OAAOpE,SAAS,CAACqE,kBAAkB,EAAE;AACvC"}
|
|
@@ -36,6 +36,12 @@ export type TranscribeRealtimeOptions = TranscribeOptions & {
|
|
|
36
36
|
* the recommended value will be <= 30 seconds. (Default: 30)
|
|
37
37
|
*/
|
|
38
38
|
realtimeAudioSec?: number;
|
|
39
|
+
/**
|
|
40
|
+
* Optimize audio transcription performance by slicing audio samples when `realtimeAudioSec` > 30.
|
|
41
|
+
* Set `realtimeAudioSliceSec` < 30 so performance improvements can be achieved in the Whisper hard constraint (processes the audio in chunks of 30 seconds).
|
|
42
|
+
* (Default: Equal to `realtimeMaxAudioSec`)
|
|
43
|
+
*/
|
|
44
|
+
realtimeAudioSliceSec?: number;
|
|
39
45
|
};
|
|
40
46
|
export type TranscribeResult = {
|
|
41
47
|
result: string;
|
|
@@ -52,24 +58,34 @@ export type TranscribeRealtimeEvent = {
|
|
|
52
58
|
isCapturing: boolean;
|
|
53
59
|
isStoppedByAction?: boolean;
|
|
54
60
|
code: number;
|
|
61
|
+
data?: TranscribeResult;
|
|
62
|
+
error?: string;
|
|
55
63
|
processTime: number;
|
|
56
64
|
recordingTime: number;
|
|
65
|
+
slices?: Array<{
|
|
66
|
+
code: number;
|
|
67
|
+
error?: string;
|
|
68
|
+
data?: TranscribeResult;
|
|
69
|
+
processTime: number;
|
|
70
|
+
recordingTime: number;
|
|
71
|
+
}>;
|
|
72
|
+
};
|
|
73
|
+
export type TranscribeRealtimeNativePayload = {
|
|
74
|
+
/** Is capturing audio, when false, the event is the final result */
|
|
75
|
+
isCapturing: boolean;
|
|
76
|
+
isStoppedByAction?: boolean;
|
|
77
|
+
code: number;
|
|
78
|
+
processTime: number;
|
|
79
|
+
recordingTime: number;
|
|
80
|
+
isUseSlices: boolean;
|
|
81
|
+
sliceIndex: number;
|
|
57
82
|
data?: TranscribeResult;
|
|
58
83
|
error?: string;
|
|
59
84
|
};
|
|
60
85
|
export type TranscribeRealtimeNativeEvent = {
|
|
61
86
|
contextId: number;
|
|
62
87
|
jobId: number;
|
|
63
|
-
payload:
|
|
64
|
-
/** Is capturing audio, when false, the event is the final result */
|
|
65
|
-
isCapturing: boolean;
|
|
66
|
-
isStoppedByAction?: boolean;
|
|
67
|
-
code: number;
|
|
68
|
-
processTime: number;
|
|
69
|
-
recordingTime: number;
|
|
70
|
-
data?: TranscribeResult;
|
|
71
|
-
error?: string;
|
|
72
|
-
};
|
|
88
|
+
payload: TranscribeRealtimeNativePayload;
|
|
73
89
|
};
|
|
74
90
|
declare class WhisperContext {
|
|
75
91
|
id: number;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.tsx"],"names":[],"mappings":"AAkCA,MAAM,MAAM,iBAAiB,GAAG;IAC9B,wDAAwD;IACxD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,iEAAiE;IACjE,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,+DAA+D;IAC/D,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,qDAAqD;IACrD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,2CAA2C;IAC3C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,oCAAoC;IACpC,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,2CAA2C;IAC3C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kCAAkC;IAClC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,mDAAmD;IACnD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,mCAAmC;IACnC,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,gCAAgC;IAChC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,wCAAwC;IACxC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,8CAA8C;IAC9C,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,qBAAqB;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB,CAAA;AAED,MAAM,MAAM,yBAAyB,GAAG,iBAAiB,GAAG;IAC1D;;;;OAIG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.tsx"],"names":[],"mappings":"AAkCA,MAAM,MAAM,iBAAiB,GAAG;IAC9B,wDAAwD;IACxD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,iEAAiE;IACjE,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,+DAA+D;IAC/D,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,qDAAqD;IACrD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,2CAA2C;IAC3C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,oCAAoC;IACpC,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,2CAA2C;IAC3C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kCAAkC;IAClC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,mDAAmD;IACnD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,mCAAmC;IACnC,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,gCAAgC;IAChC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,wCAAwC;IACxC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,8CAA8C;IAC9C,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,qBAAqB;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB,CAAA;AAED,MAAM,MAAM,yBAAyB,GAAG,iBAAiB,GAAG;IAC1D;;;;OAIG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B;;;;OAIG;IACH,qBAAqB,CAAC,EAAE,MAAM,CAAA;CAC/B,CAAA;AAED,MAAM,MAAM,gBAAgB,GAAG;IAC7B,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,KAAK,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,EAAE,EAAE,MAAM,CAAC;QACX,EAAE,EAAE,MAAM,CAAC;KACZ,CAAC,CAAC;CACJ,CAAA;AAED,MAAM,MAAM,uBAAuB,GAAG;IACpC,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,oEAAoE;IACpE,WAAW,EAAE,OAAO,CAAC;IACrB,iBAAiB,CAAC,EAAE,OAAO,CAAC;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,CAAC,EAAE,gBAAgB,CAAC;IACxB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,MAAM,CAAC;IACtB,MAAM,CAAC,EAAE,KAAK,CAAC;QACb,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,IAAI,CAAC,EAAE,gBAAgB,CAAC;QACxB,WAAW,EAAE,MAAM,CAAC;QACpB,aAAa,EAAE,MAAM,CAAC;KACvB,CAAC,CAAC;CACJ,CAAA;AAED,MAAM,MAAM,+BAA+B,GAAG;IAC5C,oEAAoE;IACpE,WAAW,EAAE,OAAO,CAAC;IACrB,iBAAiB,CAAC,EAAE,OAAO,CAAC;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,MAAM,CAAC;IACtB,WAAW,EAAE,OAAO,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,IAAI,CAAC,EAAE,gBAAgB,CAAC;IACxB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB,CAAA;AAED,MAAM,MAAM,6BAA6B,GAAG;IAC1C,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,+BAA+B,CAAC;CAC1C,CAAA;AAED,cAAM,cAAc;IAClB,EAAE,EAAE,MAAM,CAAA;gBAEE,EAAE,EAAE,MAAM;IAItB,4BAA4B;IAC5B,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,GAAE,iBAAsB,GAAG;QACzD,0BAA0B;QAC1B,IAAI,EAAE,MAAM,IAAI,CAAC;QACjB,gCAAgC;QAChC,OAAO,EAAE,OAAO,CAAC,gBAAgB,CAAC,CAAC;KACpC;IAQD,yFAAyF;IACnF,kBAAkB,CAAC,OAAO,GAAE,yBAA8B,GAAG,OAAO,CAAC;QACzE,mCAAmC;QACnC,IAAI,EAAE,MAAM,IAAI,CAAC;QACjB,8CAA8C;QAC9C,SAAS,EAAE,CAAC,QAAQ,EAAE,CAAC,KAAK,EAAE,uBAAuB,KAAK,IAAI,KAAK,IAAI,CAAC;KACzE,CAAC;IA0FI,OAAO;CAGd;AAED,wBAAsB,WAAW,CAC/B,EAAE,QAAQ,EAAE,GAAE;IAAE,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAO,GACvC,OAAO,CAAC,cAAc,CAAC,CAGzB;AAED,wBAAsB,iBAAiB,IAAI,OAAO,CAAC,IAAI,CAAC,CAEvD"}
|
package/package.json
CHANGED
package/src/index.tsx
CHANGED
|
@@ -71,6 +71,12 @@ export type TranscribeRealtimeOptions = TranscribeOptions & {
|
|
|
71
71
|
* the recommended value will be <= 30 seconds. (Default: 30)
|
|
72
72
|
*/
|
|
73
73
|
realtimeAudioSec?: number,
|
|
74
|
+
/**
|
|
75
|
+
* Optimize audio transcription performance by slicing audio samples when `realtimeAudioSec` > 30.
|
|
76
|
+
* Set `realtimeAudioSliceSec` < 30 so performance improvements can be achieved in the Whisper hard constraint (processes the audio in chunks of 30 seconds).
|
|
77
|
+
* (Default: Equal to `realtimeMaxAudioSec`)
|
|
78
|
+
*/
|
|
79
|
+
realtimeAudioSliceSec?: number
|
|
74
80
|
}
|
|
75
81
|
|
|
76
82
|
export type TranscribeResult = {
|
|
@@ -85,12 +91,32 @@ export type TranscribeResult = {
|
|
|
85
91
|
export type TranscribeRealtimeEvent = {
|
|
86
92
|
contextId: number,
|
|
87
93
|
jobId: number,
|
|
94
|
+
/** Is capturing audio, when false, the event is the final result */
|
|
95
|
+
isCapturing: boolean,
|
|
96
|
+
isStoppedByAction?: boolean,
|
|
97
|
+
code: number,
|
|
98
|
+
data?: TranscribeResult,
|
|
99
|
+
error?: string,
|
|
100
|
+
processTime: number,
|
|
101
|
+
recordingTime: number,
|
|
102
|
+
slices?: Array<{
|
|
103
|
+
code: number,
|
|
104
|
+
error?: string,
|
|
105
|
+
data?: TranscribeResult,
|
|
106
|
+
processTime: number,
|
|
107
|
+
recordingTime: number,
|
|
108
|
+
}>,
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
export type TranscribeRealtimeNativePayload = {
|
|
88
112
|
/** Is capturing audio, when false, the event is the final result */
|
|
89
113
|
isCapturing: boolean,
|
|
90
114
|
isStoppedByAction?: boolean,
|
|
91
115
|
code: number,
|
|
92
116
|
processTime: number,
|
|
93
117
|
recordingTime: number,
|
|
118
|
+
isUseSlices: boolean,
|
|
119
|
+
sliceIndex: number,
|
|
94
120
|
data?: TranscribeResult,
|
|
95
121
|
error?: string,
|
|
96
122
|
}
|
|
@@ -98,16 +124,7 @@ export type TranscribeRealtimeEvent = {
|
|
|
98
124
|
export type TranscribeRealtimeNativeEvent = {
|
|
99
125
|
contextId: number,
|
|
100
126
|
jobId: number,
|
|
101
|
-
payload:
|
|
102
|
-
/** Is capturing audio, when false, the event is the final result */
|
|
103
|
-
isCapturing: boolean,
|
|
104
|
-
isStoppedByAction?: boolean,
|
|
105
|
-
code: number,
|
|
106
|
-
processTime: number,
|
|
107
|
-
recordingTime: number,
|
|
108
|
-
data?: TranscribeResult,
|
|
109
|
-
error?: string,
|
|
110
|
-
},
|
|
127
|
+
payload: TranscribeRealtimeNativePayload,
|
|
111
128
|
}
|
|
112
129
|
|
|
113
130
|
class WhisperContext {
|
|
@@ -140,7 +157,52 @@ class WhisperContext {
|
|
|
140
157
|
}> {
|
|
141
158
|
const jobId: number = Math.floor(Math.random() * 10000)
|
|
142
159
|
await RNWhisper.startRealtimeTranscribe(this.id, jobId, options)
|
|
143
|
-
let lastTranscribePayload:
|
|
160
|
+
let lastTranscribePayload: TranscribeRealtimeNativePayload
|
|
161
|
+
|
|
162
|
+
const slices: TranscribeRealtimeNativePayload[] = []
|
|
163
|
+
let sliceIndex: number = 0
|
|
164
|
+
let tOffset: number = 0
|
|
165
|
+
|
|
166
|
+
const putSlice = (payload: TranscribeRealtimeNativePayload) => {
|
|
167
|
+
if (!payload.isUseSlices) return
|
|
168
|
+
if (sliceIndex !== payload.sliceIndex) {
|
|
169
|
+
const { segments = [] } = slices[sliceIndex]?.data || {}
|
|
170
|
+
tOffset = segments[segments.length - 1]?.t1 || 0
|
|
171
|
+
}
|
|
172
|
+
({ sliceIndex } = payload)
|
|
173
|
+
slices[sliceIndex] = {
|
|
174
|
+
...payload,
|
|
175
|
+
data: payload.data ? {
|
|
176
|
+
...payload.data,
|
|
177
|
+
segments: payload.data.segments.map((segment) => ({
|
|
178
|
+
...segment,
|
|
179
|
+
t0: segment.t0 + tOffset,
|
|
180
|
+
t1: segment.t1 + tOffset,
|
|
181
|
+
})) || [],
|
|
182
|
+
} : undefined,
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
const mergeSlicesIfNeeded = (payload: TranscribeRealtimeNativePayload): TranscribeRealtimeNativePayload => {
|
|
187
|
+
if (!payload.isUseSlices) return payload
|
|
188
|
+
|
|
189
|
+
const mergedPayload: any = {}
|
|
190
|
+
slices.forEach(
|
|
191
|
+
(slice) => {
|
|
192
|
+
mergedPayload.data = {
|
|
193
|
+
result: (mergedPayload.data?.result || '') + (slice.data?.result || ''),
|
|
194
|
+
segments: [
|
|
195
|
+
...(mergedPayload?.data?.segments || []),
|
|
196
|
+
...(slice.data?.segments || []),
|
|
197
|
+
],
|
|
198
|
+
}
|
|
199
|
+
mergedPayload.processTime = slice.processTime
|
|
200
|
+
mergedPayload.recordingTime = (mergedPayload?.recordingTime || 0) + slice.recordingTime
|
|
201
|
+
}
|
|
202
|
+
)
|
|
203
|
+
return { ...payload, ...mergedPayload, slices }
|
|
204
|
+
}
|
|
205
|
+
|
|
144
206
|
return {
|
|
145
207
|
stop: () => RNWhisper.abortTranscribe(this.id, jobId),
|
|
146
208
|
subscribe: (callback: (event: TranscribeRealtimeEvent) => void) => {
|
|
@@ -150,7 +212,12 @@ class WhisperContext {
|
|
|
150
212
|
const { contextId, payload } = evt
|
|
151
213
|
if (contextId !== this.id || evt.jobId !== jobId) return
|
|
152
214
|
lastTranscribePayload = payload
|
|
153
|
-
|
|
215
|
+
putSlice(payload)
|
|
216
|
+
callback({
|
|
217
|
+
contextId,
|
|
218
|
+
jobId: evt.jobId,
|
|
219
|
+
...mergeSlicesIfNeeded(payload),
|
|
220
|
+
})
|
|
154
221
|
}
|
|
155
222
|
)
|
|
156
223
|
const endListener = EventEmitter.addListener(
|
|
@@ -158,11 +225,15 @@ class WhisperContext {
|
|
|
158
225
|
(evt: TranscribeRealtimeNativeEvent) => {
|
|
159
226
|
const { contextId, payload } = evt
|
|
160
227
|
if (contextId !== this.id || evt.jobId !== jobId) return
|
|
228
|
+
const lastPayload = {
|
|
229
|
+
...lastTranscribePayload,
|
|
230
|
+
...payload,
|
|
231
|
+
}
|
|
232
|
+
putSlice(lastPayload)
|
|
161
233
|
callback({
|
|
162
234
|
contextId,
|
|
163
235
|
jobId: evt.jobId,
|
|
164
|
-
...
|
|
165
|
-
...payload,
|
|
236
|
+
...mergeSlicesIfNeeded(lastPayload),
|
|
166
237
|
isCapturing: false
|
|
167
238
|
})
|
|
168
239
|
transcribeListener.remove()
|