whisper.rn 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +155 -55
- package/android/src/main/jni/whisper/jni.cpp +3 -3
- package/ios/RNWhisperContext.h +8 -3
- package/ios/RNWhisperContext.mm +150 -47
- package/jest/mock.js +6 -1
- package/lib/commonjs/index.js +57 -11
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/index.js +57 -11
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/index.d.ts +27 -9
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/index.tsx +94 -20
package/README.md
CHANGED
|
@@ -57,7 +57,7 @@ const { result } = await promise
|
|
|
57
57
|
Use realtime transcribe:
|
|
58
58
|
|
|
59
59
|
```js
|
|
60
|
-
const { stop, subscribe } = whisperContext.transcribeRealtime(options)
|
|
60
|
+
const { stop, subscribe } = await whisperContext.transcribeRealtime(options)
|
|
61
61
|
|
|
62
62
|
subscribe(evt => {
|
|
63
63
|
const { isCapturing, data, processTime, recordingTime } = evt
|
|
@@ -15,6 +15,7 @@ import android.media.AudioRecord;
|
|
|
15
15
|
import android.media.MediaRecorder.AudioSource;
|
|
16
16
|
|
|
17
17
|
import java.util.Random;
|
|
18
|
+
import java.util.ArrayList;
|
|
18
19
|
import java.lang.StringBuilder;
|
|
19
20
|
import java.io.File;
|
|
20
21
|
import java.io.BufferedReader;
|
|
@@ -41,17 +42,25 @@ public class WhisperContext {
|
|
|
41
42
|
private int id;
|
|
42
43
|
private ReactApplicationContext reactContext;
|
|
43
44
|
private long context;
|
|
44
|
-
|
|
45
|
-
private DeviceEventManagerModule.RCTDeviceEventEmitter eventEmitter;
|
|
46
|
-
|
|
47
45
|
private int jobId = -1;
|
|
46
|
+
private DeviceEventManagerModule.RCTDeviceEventEmitter eventEmitter;
|
|
47
|
+
|
|
48
48
|
private AudioRecord recorder = null;
|
|
49
49
|
private int bufferSize;
|
|
50
|
-
private
|
|
51
|
-
private
|
|
50
|
+
private int nSamplesTranscribing = 0;
|
|
51
|
+
private ArrayList<short[]> shortBufferSlices;
|
|
52
|
+
// Remember number of samples in each slice
|
|
53
|
+
private ArrayList<Integer> sliceNSamples;
|
|
54
|
+
// Current buffer slice index
|
|
55
|
+
private int sliceIndex = 0;
|
|
56
|
+
// Current transcribing slice index
|
|
57
|
+
private int transcribeSliceIndex = 0;
|
|
58
|
+
private boolean isUseSlices = false;
|
|
59
|
+
private boolean isRealtime = false;
|
|
52
60
|
private boolean isCapturing = false;
|
|
61
|
+
private boolean isStoppedByAction = false;
|
|
53
62
|
private boolean isTranscribing = false;
|
|
54
|
-
private
|
|
63
|
+
private Thread fullHandler = null;
|
|
55
64
|
|
|
56
65
|
public WhisperContext(int id, ReactApplicationContext reactContext, long context) {
|
|
57
66
|
this.id = id;
|
|
@@ -61,6 +70,19 @@ public class WhisperContext {
|
|
|
61
70
|
bufferSize = AudioRecord.getMinBufferSize(SAMPLE_RATE, CHANNEL_CONFIG, AUDIO_FORMAT);
|
|
62
71
|
}
|
|
63
72
|
|
|
73
|
+
private void resetRealtimeTranscribe() {
|
|
74
|
+
shortBufferSlices = null;
|
|
75
|
+
sliceNSamples = null;
|
|
76
|
+
sliceIndex = 0;
|
|
77
|
+
transcribeSliceIndex = 0;
|
|
78
|
+
isUseSlices = false;
|
|
79
|
+
isRealtime = false;
|
|
80
|
+
isCapturing = false;
|
|
81
|
+
isStoppedByAction = false;
|
|
82
|
+
isTranscribing = false;
|
|
83
|
+
fullHandler = null;
|
|
84
|
+
}
|
|
85
|
+
|
|
64
86
|
public int startRealtimeTranscribe(int jobId, ReadableMap options) {
|
|
65
87
|
if (isCapturing || isTranscribing) {
|
|
66
88
|
return -100;
|
|
@@ -73,17 +95,25 @@ public class WhisperContext {
|
|
|
73
95
|
recorder.release();
|
|
74
96
|
return state;
|
|
75
97
|
}
|
|
76
|
-
|
|
77
|
-
int realtimeAudioSec = options.hasKey("realtimeAudioSec") ? options.getInt("realtimeAudioSec") : 0;
|
|
78
|
-
final int maxAudioSec = realtimeAudioSec > 0 ? realtimeAudioSec : DEFAULT_MAX_AUDIO_SEC;
|
|
79
98
|
|
|
80
|
-
|
|
99
|
+
resetRealtimeTranscribe();
|
|
81
100
|
|
|
82
101
|
this.jobId = jobId;
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
102
|
+
|
|
103
|
+
int realtimeAudioSec = options.hasKey("realtimeAudioSec") ? options.getInt("realtimeAudioSec") : 0;
|
|
104
|
+
final int audioSec = realtimeAudioSec > 0 ? realtimeAudioSec : DEFAULT_MAX_AUDIO_SEC;
|
|
105
|
+
|
|
106
|
+
int realtimeAudioSliceSec = options.hasKey("realtimeAudioSliceSec") ? options.getInt("realtimeAudioSliceSec") : 0;
|
|
107
|
+
final int audioSliceSec = realtimeAudioSliceSec > 0 && realtimeAudioSliceSec < audioSec ? realtimeAudioSliceSec : audioSec;
|
|
108
|
+
|
|
109
|
+
isUseSlices = audioSliceSec < audioSec;
|
|
110
|
+
|
|
111
|
+
shortBufferSlices = new ArrayList<short[]>();
|
|
112
|
+
shortBufferSlices.add(new short[audioSliceSec * SAMPLE_RATE]);
|
|
113
|
+
sliceNSamples = new ArrayList<Integer>();
|
|
114
|
+
sliceNSamples.add(0);
|
|
86
115
|
|
|
116
|
+
isCapturing = true;
|
|
87
117
|
recorder.startRecording();
|
|
88
118
|
|
|
89
119
|
new Thread(new Runnable() {
|
|
@@ -91,60 +121,57 @@ public class WhisperContext {
|
|
|
91
121
|
public void run() {
|
|
92
122
|
try {
|
|
93
123
|
short[] buffer = new short[bufferSize];
|
|
94
|
-
Thread fullHandler = null;
|
|
95
124
|
while (isCapturing) {
|
|
96
125
|
try {
|
|
97
126
|
int n = recorder.read(buffer, 0, bufferSize);
|
|
98
127
|
if (n == 0) continue;
|
|
99
128
|
|
|
100
|
-
|
|
101
|
-
|
|
129
|
+
int totalNSamples = 0;
|
|
130
|
+
for (int i = 0; i < sliceNSamples.size(); i++) {
|
|
131
|
+
totalNSamples += sliceNSamples.get(i);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
int nSamples = sliceNSamples.get(sliceIndex);
|
|
135
|
+
if (totalNSamples + n > audioSec * SAMPLE_RATE) {
|
|
136
|
+
// Full, stop capturing
|
|
102
137
|
isCapturing = false;
|
|
103
|
-
if (
|
|
138
|
+
if (
|
|
139
|
+
!isTranscribing &&
|
|
140
|
+
nSamples == nSamplesTranscribing &&
|
|
141
|
+
sliceIndex == transcribeSliceIndex
|
|
142
|
+
) {
|
|
104
143
|
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribeEnd", Arguments.createMap());
|
|
144
|
+
} else if (!isTranscribing) {
|
|
145
|
+
isTranscribing = true;
|
|
146
|
+
fullTranscribeSamples(options, true);
|
|
147
|
+
}
|
|
105
148
|
break;
|
|
106
149
|
}
|
|
107
|
-
|
|
150
|
+
|
|
151
|
+
// Append to buffer
|
|
152
|
+
short[] shortBuffer = shortBufferSlices.get(sliceIndex);
|
|
153
|
+
if (nSamples + n > audioSliceSec * SAMPLE_RATE) {
|
|
154
|
+
Log.d(NAME, "next slice");
|
|
155
|
+
|
|
156
|
+
sliceIndex++;
|
|
157
|
+
nSamples = 0;
|
|
158
|
+
shortBuffer = new short[audioSliceSec * SAMPLE_RATE];
|
|
159
|
+
shortBufferSlices.add(shortBuffer);
|
|
160
|
+
sliceNSamples.add(0);
|
|
161
|
+
}
|
|
162
|
+
|
|
108
163
|
for (int i = 0; i < n; i++) {
|
|
109
|
-
|
|
164
|
+
shortBuffer[nSamples + i] = buffer[i];
|
|
110
165
|
}
|
|
166
|
+
nSamples += n;
|
|
167
|
+
sliceNSamples.set(sliceIndex, nSamples);
|
|
168
|
+
|
|
111
169
|
if (!isTranscribing && nSamples > SAMPLE_RATE / 2) {
|
|
112
170
|
isTranscribing = true;
|
|
113
|
-
Log.d(NAME, "Start transcribing realtime: " + nSamples);
|
|
114
171
|
fullHandler = new Thread(new Runnable() {
|
|
115
172
|
@Override
|
|
116
173
|
public void run() {
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
// convert I16 to F32
|
|
120
|
-
float[] nSamplesBuffer32 = new float[nSamples];
|
|
121
|
-
for (int i = 0; i < nSamples; i++) {
|
|
122
|
-
nSamplesBuffer32[i] = buffer16[i] / 32768.0f;
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
int timeStart = (int) System.currentTimeMillis();
|
|
126
|
-
int code = full(jobId, options, nSamplesBuffer32, nSamples);
|
|
127
|
-
int timeEnd = (int) System.currentTimeMillis();
|
|
128
|
-
int timeRecording = (int) (nSamples / SAMPLE_RATE * 1000);
|
|
129
|
-
|
|
130
|
-
WritableMap payload = Arguments.createMap();
|
|
131
|
-
payload.putBoolean("isCapturing", isCapturing);
|
|
132
|
-
payload.putInt("code", code);
|
|
133
|
-
payload.putInt("processTime", timeEnd - timeStart);
|
|
134
|
-
payload.putInt("recordingTime", timeRecording);
|
|
135
|
-
|
|
136
|
-
if (code == 0) {
|
|
137
|
-
payload.putMap("data", getTextSegments());
|
|
138
|
-
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribe", payload);
|
|
139
|
-
} else {
|
|
140
|
-
payload.putString("error", "Transcribe failed with code " + code);
|
|
141
|
-
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribe", payload);
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
if (!isCapturing) {
|
|
145
|
-
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribeEnd", Arguments.createMap());
|
|
146
|
-
}
|
|
147
|
-
isTranscribing = false;
|
|
174
|
+
fullTranscribeSamples(options, false);
|
|
148
175
|
}
|
|
149
176
|
});
|
|
150
177
|
fullHandler.start();
|
|
@@ -153,9 +180,14 @@ public class WhisperContext {
|
|
|
153
180
|
Log.e(NAME, "Error transcribing realtime: " + e.getMessage());
|
|
154
181
|
}
|
|
155
182
|
}
|
|
183
|
+
if (!isTranscribing) {
|
|
184
|
+
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribeEnd", Arguments.createMap());
|
|
185
|
+
}
|
|
156
186
|
if (fullHandler != null) {
|
|
157
187
|
fullHandler.join(); // Wait for full transcribe to finish
|
|
158
188
|
}
|
|
189
|
+
// Cleanup
|
|
190
|
+
resetRealtimeTranscribe();
|
|
159
191
|
recorder.stop();
|
|
160
192
|
} catch (Exception e) {
|
|
161
193
|
e.printStackTrace();
|
|
@@ -165,10 +197,79 @@ public class WhisperContext {
|
|
|
165
197
|
}
|
|
166
198
|
}
|
|
167
199
|
}).start();
|
|
168
|
-
|
|
169
200
|
return state;
|
|
170
201
|
}
|
|
171
202
|
|
|
203
|
+
private void fullTranscribeSamples(ReadableMap options, boolean skipCapturingCheck) {
|
|
204
|
+
int nSamplesOfIndex = sliceNSamples.get(transcribeSliceIndex);
|
|
205
|
+
|
|
206
|
+
if (!isCapturing && !skipCapturingCheck) return;
|
|
207
|
+
|
|
208
|
+
short[] shortBuffer = shortBufferSlices.get(transcribeSliceIndex);
|
|
209
|
+
int nSamples = sliceNSamples.get(transcribeSliceIndex);
|
|
210
|
+
|
|
211
|
+
nSamplesTranscribing = nSamplesOfIndex;
|
|
212
|
+
|
|
213
|
+
// convert I16 to F32
|
|
214
|
+
float[] nSamplesBuffer32 = new float[nSamplesTranscribing];
|
|
215
|
+
for (int i = 0; i < nSamplesTranscribing; i++) {
|
|
216
|
+
nSamplesBuffer32[i] = shortBuffer[i] / 32768.0f;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
Log.d(NAME, "Start transcribing realtime: " + nSamplesTranscribing);
|
|
220
|
+
|
|
221
|
+
int timeStart = (int) System.currentTimeMillis();
|
|
222
|
+
int code = full(jobId, options, nSamplesBuffer32, nSamplesTranscribing);
|
|
223
|
+
int timeEnd = (int) System.currentTimeMillis();
|
|
224
|
+
int timeRecording = (int) (nSamplesTranscribing / SAMPLE_RATE * 1000);
|
|
225
|
+
|
|
226
|
+
WritableMap payload = Arguments.createMap();
|
|
227
|
+
payload.putInt("code", code);
|
|
228
|
+
payload.putInt("processTime", timeEnd - timeStart);
|
|
229
|
+
payload.putInt("recordingTime", timeRecording);
|
|
230
|
+
payload.putBoolean("isUseSlices", isUseSlices);
|
|
231
|
+
payload.putInt("sliceIndex", transcribeSliceIndex);
|
|
232
|
+
|
|
233
|
+
if (code == 0) {
|
|
234
|
+
payload.putMap("data", getTextSegments());
|
|
235
|
+
} else {
|
|
236
|
+
payload.putString("error", "Transcribe failed with code " + code);
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
nSamplesOfIndex = sliceNSamples.get(transcribeSliceIndex);
|
|
240
|
+
if (
|
|
241
|
+
isStoppedByAction ||
|
|
242
|
+
!isCapturing &&
|
|
243
|
+
nSamplesTranscribing == nSamplesOfIndex &&
|
|
244
|
+
sliceIndex == transcribeSliceIndex
|
|
245
|
+
) {
|
|
246
|
+
payload.putBoolean("isCapturing", false);
|
|
247
|
+
payload.putBoolean("isStoppedByAction", isStoppedByAction);
|
|
248
|
+
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribeEnd", payload);
|
|
249
|
+
} else if (code == 0) {
|
|
250
|
+
payload.putBoolean("isCapturing", true);
|
|
251
|
+
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribe", payload);
|
|
252
|
+
} else {
|
|
253
|
+
payload.putBoolean("isCapturing", true);
|
|
254
|
+
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribe", payload);
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
if (
|
|
258
|
+
// If no more samples on current slice, move to next slice
|
|
259
|
+
nSamplesTranscribing == sliceNSamples.get(transcribeSliceIndex) &&
|
|
260
|
+
transcribeSliceIndex != sliceIndex
|
|
261
|
+
) {
|
|
262
|
+
transcribeSliceIndex++;
|
|
263
|
+
nSamplesTranscribing = 0;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
if (!isCapturing && nSamplesTranscribing != nSamplesOfIndex) {
|
|
267
|
+
// If no more capturing, continue transcribing until all slices are transcribed
|
|
268
|
+
fullTranscribeSamples(options, true);
|
|
269
|
+
}
|
|
270
|
+
isTranscribing = false;
|
|
271
|
+
}
|
|
272
|
+
|
|
172
273
|
private void emitTranscribeEvent(final String eventName, final WritableMap payload) {
|
|
173
274
|
WritableMap event = Arguments.createMap();
|
|
174
275
|
event.putInt("contextId", WhisperContext.this.id);
|
|
@@ -194,8 +295,6 @@ public class WhisperContext {
|
|
|
194
295
|
return fullTranscribe(
|
|
195
296
|
jobId,
|
|
196
297
|
context,
|
|
197
|
-
// jboolean realtime,
|
|
198
|
-
isRealtime,
|
|
199
298
|
// float[] audio_data,
|
|
200
299
|
audioData,
|
|
201
300
|
// jint audio_data_len,
|
|
@@ -246,6 +345,7 @@ public class WhisperContext {
|
|
|
246
345
|
builder.append(text);
|
|
247
346
|
|
|
248
347
|
WritableMap segment = Arguments.createMap();
|
|
348
|
+
Log.d(NAME, "getTextSegments: " + text + " " + transcribeSliceIndex);
|
|
249
349
|
segment.putString("text", text);
|
|
250
350
|
segment.putInt("t0", getTextSegmentT0(context, i));
|
|
251
351
|
segment.putInt("t1", getTextSegmentT1(context, i));
|
|
@@ -269,6 +369,7 @@ public class WhisperContext {
|
|
|
269
369
|
abortTranscribe(jobId);
|
|
270
370
|
isCapturing = false;
|
|
271
371
|
isTranscribing = false;
|
|
372
|
+
isStoppedByAction = true;
|
|
272
373
|
}
|
|
273
374
|
|
|
274
375
|
public void stopCurrentTranscribe() {
|
|
@@ -371,7 +472,6 @@ public class WhisperContext {
|
|
|
371
472
|
protected static native int fullTranscribe(
|
|
372
473
|
int job_id,
|
|
373
474
|
long context,
|
|
374
|
-
boolean realtime,
|
|
375
475
|
float[] audio_data,
|
|
376
476
|
int audio_data_len,
|
|
377
477
|
int n_threads,
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
#include <cstdlib>
|
|
6
6
|
#include <sys/sysinfo.h>
|
|
7
7
|
#include <string>
|
|
8
|
+
#include <thread>
|
|
8
9
|
#include "whisper.h"
|
|
9
10
|
#include "rn-whisper.h"
|
|
10
11
|
#include "ggml.h"
|
|
@@ -38,7 +39,6 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
|
|
|
38
39
|
jobject thiz,
|
|
39
40
|
jint job_id,
|
|
40
41
|
jlong context_ptr,
|
|
41
|
-
jboolean realtime,
|
|
42
42
|
jfloatArray audio_data,
|
|
43
43
|
jint audio_data_len,
|
|
44
44
|
jint n_threads,
|
|
@@ -61,7 +61,7 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
|
|
|
61
61
|
struct whisper_context *context = reinterpret_cast<struct whisper_context *>(context_ptr);
|
|
62
62
|
jfloat *audio_data_arr = env->GetFloatArrayElements(audio_data, nullptr);
|
|
63
63
|
|
|
64
|
-
int max_threads = min(4,
|
|
64
|
+
int max_threads = min(4, std::thread::hardware_concurrency());
|
|
65
65
|
|
|
66
66
|
LOGI("About to create params");
|
|
67
67
|
|
|
@@ -83,7 +83,7 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
|
|
|
83
83
|
params.speed_up = speed_up;
|
|
84
84
|
params.offset_ms = 0;
|
|
85
85
|
params.no_context = true;
|
|
86
|
-
params.single_segment =
|
|
86
|
+
params.single_segment = false;
|
|
87
87
|
|
|
88
88
|
if (max_len > -1) {
|
|
89
89
|
params.max_len = max_len;
|
package/ios/RNWhisperContext.h
CHANGED
|
@@ -18,10 +18,15 @@ typedef struct {
|
|
|
18
18
|
bool isTranscribing;
|
|
19
19
|
bool isRealtime;
|
|
20
20
|
bool isCapturing;
|
|
21
|
+
bool isStoppedByAction;
|
|
21
22
|
int maxAudioSec;
|
|
22
|
-
int
|
|
23
|
-
|
|
24
|
-
|
|
23
|
+
int nSamplesTranscribing;
|
|
24
|
+
NSMutableArray<NSValue *> *shortBufferSlices;
|
|
25
|
+
NSMutableArray<NSNumber *> *sliceNSamples;
|
|
26
|
+
bool isUseSlices;
|
|
27
|
+
int sliceIndex;
|
|
28
|
+
int transcribeSliceIndex;
|
|
29
|
+
int audioSliceSec;
|
|
25
30
|
|
|
26
31
|
AudioQueueRef queue;
|
|
27
32
|
AudioStreamBasicDescription dataFormat;
|
package/ios/RNWhisperContext.mm
CHANGED
|
@@ -27,13 +27,28 @@
|
|
|
27
27
|
self->recordState.dataFormat.mReserved = 0;
|
|
28
28
|
self->recordState.dataFormat.mFormatFlags = kLinearPCMFormatFlagIsSignedInteger;
|
|
29
29
|
|
|
30
|
-
self->recordState.nSamples = 0;
|
|
31
|
-
|
|
32
30
|
int maxAudioSecOpt = options[@"realtimeAudioSec"] != nil ? [options[@"realtimeAudioSec"] intValue] : 0;
|
|
33
31
|
int maxAudioSec = maxAudioSecOpt > 0 ? maxAudioSecOpt : DEFAULT_MAX_AUDIO_SEC;
|
|
34
32
|
self->recordState.maxAudioSec = maxAudioSec;
|
|
35
|
-
|
|
36
|
-
|
|
33
|
+
|
|
34
|
+
int realtimeAudioSliceSec = options[@"realtimeAudioSliceSec"] != nil ? [options[@"realtimeAudioSliceSec"] intValue] : 0;
|
|
35
|
+
int audioSliceSec = realtimeAudioSliceSec > 0 && realtimeAudioSliceSec < maxAudioSec ? realtimeAudioSliceSec : maxAudioSec;
|
|
36
|
+
|
|
37
|
+
self->recordState.audioSliceSec = audioSliceSec;
|
|
38
|
+
self->recordState.isUseSlices = audioSliceSec < maxAudioSec;
|
|
39
|
+
|
|
40
|
+
self->recordState.sliceIndex = 0;
|
|
41
|
+
self->recordState.transcribeSliceIndex = 0;
|
|
42
|
+
self->recordState.nSamplesTranscribing = 0;
|
|
43
|
+
|
|
44
|
+
[self freeBufferIfNeeded];
|
|
45
|
+
self->recordState.shortBufferSlices = [NSMutableArray new];
|
|
46
|
+
|
|
47
|
+
int16_t *audioBufferI16 = (int16_t *) malloc(audioSliceSec * WHISPER_SAMPLE_RATE * sizeof(int16_t));
|
|
48
|
+
[self->recordState.shortBufferSlices addObject:[NSValue valueWithPointer:audioBufferI16]];
|
|
49
|
+
|
|
50
|
+
self->recordState.sliceNSamples = [NSMutableArray new];
|
|
51
|
+
[self->recordState.sliceNSamples addObject:[NSNumber numberWithInt:0]];
|
|
37
52
|
|
|
38
53
|
self->recordState.isRealtime = true;
|
|
39
54
|
self->recordState.isTranscribing = false;
|
|
@@ -42,6 +57,16 @@
|
|
|
42
57
|
self->recordState.mSelf = self;
|
|
43
58
|
}
|
|
44
59
|
|
|
60
|
+
- (void)freeBufferIfNeeded {
|
|
61
|
+
if (self->recordState.shortBufferSlices != nil) {
|
|
62
|
+
for (int i = 0; i < [self->recordState.shortBufferSlices count]; i++) {
|
|
63
|
+
int16_t *audioBufferI16 = (int16_t *) [self->recordState.shortBufferSlices[i] pointerValue];
|
|
64
|
+
free(audioBufferI16);
|
|
65
|
+
}
|
|
66
|
+
self->recordState.shortBufferSlices = nil;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
45
70
|
void AudioInputCallback(void * inUserData,
|
|
46
71
|
AudioQueueRef inAQ,
|
|
47
72
|
AudioQueueBufferRef inBuffer,
|
|
@@ -53,70 +78,147 @@ void AudioInputCallback(void * inUserData,
|
|
|
53
78
|
|
|
54
79
|
if (!state->isCapturing) {
|
|
55
80
|
NSLog(@"[RNWhisper] Not capturing, ignoring audio");
|
|
81
|
+
if (!state->isTranscribing) {
|
|
82
|
+
state->transcribeHandler(state->jobId, @"end", @{});
|
|
83
|
+
}
|
|
56
84
|
return;
|
|
57
85
|
}
|
|
58
86
|
|
|
87
|
+
int totalNSamples = 0;
|
|
88
|
+
for (int i = 0; i < [state->sliceNSamples count]; i++) {
|
|
89
|
+
totalNSamples += [[state->sliceNSamples objectAtIndex:i] intValue];
|
|
90
|
+
}
|
|
91
|
+
|
|
59
92
|
const int n = inBuffer->mAudioDataByteSize / 2;
|
|
60
|
-
NSLog(@"[RNWhisper] Captured %d new samples", n);
|
|
61
93
|
|
|
62
|
-
|
|
63
|
-
|
|
94
|
+
int nSamples = [state->sliceNSamples[state->sliceIndex] intValue];
|
|
95
|
+
|
|
96
|
+
if (totalNSamples + n > state->maxAudioSec * WHISPER_SAMPLE_RATE) {
|
|
97
|
+
NSLog(@"[RNWhisper] Audio buffer is full, stop capturing");
|
|
64
98
|
state->isCapturing = false;
|
|
65
|
-
|
|
99
|
+
[state->mSelf stopAudio];
|
|
100
|
+
if (
|
|
101
|
+
!state->isTranscribing &&
|
|
102
|
+
nSamples == state->nSamplesTranscribing &&
|
|
103
|
+
state->sliceIndex == state->transcribeSliceIndex
|
|
104
|
+
) {
|
|
66
105
|
state->transcribeHandler(state->jobId, @"end", @{});
|
|
106
|
+
} else if (
|
|
107
|
+
!state->isTranscribing &&
|
|
108
|
+
nSamples != state->nSamplesTranscribing
|
|
109
|
+
) {
|
|
110
|
+
state->isTranscribing = true;
|
|
111
|
+
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
|
|
112
|
+
[state->mSelf fullTranscribeSamples:state];
|
|
113
|
+
});
|
|
67
114
|
}
|
|
68
|
-
[state->mSelf stopAudio];
|
|
69
115
|
return;
|
|
70
116
|
}
|
|
71
117
|
|
|
118
|
+
int audioSliceSec = state->audioSliceSec;
|
|
119
|
+
if (nSamples + n > audioSliceSec * WHISPER_SAMPLE_RATE) {
|
|
120
|
+
// next slice
|
|
121
|
+
state->sliceIndex++;
|
|
122
|
+
nSamples = 0;
|
|
123
|
+
int16_t* audioBufferI16 = (int16_t*) malloc(audioSliceSec * WHISPER_SAMPLE_RATE * sizeof(int16_t));
|
|
124
|
+
[state->shortBufferSlices addObject:[NSValue valueWithPointer:audioBufferI16]];
|
|
125
|
+
[state->sliceNSamples addObject:[NSNumber numberWithInt:0]];
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Append to buffer
|
|
129
|
+
NSLog(@"[RNWhisper] Slice %d has %d samples", state->sliceIndex, nSamples);
|
|
130
|
+
|
|
131
|
+
int16_t* audioBufferI16 = (int16_t*) [state->shortBufferSlices[state->sliceIndex] pointerValue];
|
|
72
132
|
for (int i = 0; i < n; i++) {
|
|
73
|
-
|
|
133
|
+
audioBufferI16[nSamples + i] = ((short*)inBuffer->mAudioData)[i];
|
|
74
134
|
}
|
|
75
|
-
|
|
135
|
+
nSamples += n;
|
|
136
|
+
state->sliceNSamples[state->sliceIndex] = [NSNumber numberWithInt:nSamples];
|
|
76
137
|
|
|
77
138
|
AudioQueueEnqueueBuffer(state->queue, inBuffer, 0, NULL);
|
|
78
139
|
|
|
79
140
|
if (!state->isTranscribing) {
|
|
80
141
|
state->isTranscribing = true;
|
|
81
142
|
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
|
|
82
|
-
|
|
83
|
-
// convert I16 to F32
|
|
84
|
-
for (int i = 0; i < state->nSamples; i++) {
|
|
85
|
-
state->audioBufferF32[i] = (float)state->audioBufferI16[i] / 32768.0f;
|
|
86
|
-
}
|
|
87
|
-
CFTimeInterval timeStart = CACurrentMediaTime();
|
|
88
|
-
|
|
89
|
-
int code = [state->mSelf fullTranscribe:state->jobId audioData:state->audioBufferF32 audioDataCount:state->nSamples options:state->options];
|
|
90
|
-
|
|
91
|
-
CFTimeInterval timeEnd = CACurrentMediaTime();
|
|
92
|
-
const float timeRecording = (float) state->nSamples / (float) state->dataFormat.mSampleRate;
|
|
93
|
-
if (code == 0) {
|
|
94
|
-
state->transcribeHandler(state->jobId, @"transcribe", @{
|
|
95
|
-
@"isCapturing": @(state->isCapturing),
|
|
96
|
-
@"code": [NSNumber numberWithInt:code],
|
|
97
|
-
@"data": [state->mSelf getTextSegments],
|
|
98
|
-
@"processTime": [NSNumber numberWithInt:(timeEnd - timeStart) * 1E3],
|
|
99
|
-
@"recordingTime": [NSNumber numberWithInt:timeRecording * 1E3],
|
|
100
|
-
});
|
|
101
|
-
state->isTranscribing = false;
|
|
102
|
-
return;
|
|
103
|
-
}
|
|
104
|
-
state->transcribeHandler(state->jobId, @"transcribe", @{
|
|
105
|
-
@"isCapturing": @(state->isCapturing),
|
|
106
|
-
@"code": [NSNumber numberWithInt:code],
|
|
107
|
-
@"error": [NSString stringWithFormat:@"Transcribe failed with code %d", code],
|
|
108
|
-
@"processTime": [NSNumber numberWithDouble:timeEnd - timeStart],
|
|
109
|
-
@"recordingTime": [NSNumber numberWithFloat:timeRecording],
|
|
110
|
-
});
|
|
111
|
-
if (!state->isCapturing) {
|
|
112
|
-
NSLog(@"[RNWhisper] Transcribe end");
|
|
113
|
-
state->transcribeHandler(state->jobId, @"end", @{});
|
|
114
|
-
}
|
|
115
|
-
state->isTranscribing = false;
|
|
143
|
+
[state->mSelf fullTranscribeSamples:state];
|
|
116
144
|
});
|
|
117
145
|
}
|
|
118
146
|
}
|
|
119
147
|
|
|
148
|
+
- (void)fullTranscribeSamples:(RNWhisperContextRecordState*) state {
|
|
149
|
+
int nSamplesOfIndex = [[state->sliceNSamples objectAtIndex:state->transcribeSliceIndex] intValue];
|
|
150
|
+
state->nSamplesTranscribing = nSamplesOfIndex;
|
|
151
|
+
NSLog(@"[RNWhisper] Transcribing %d samples", state->nSamplesTranscribing);
|
|
152
|
+
|
|
153
|
+
int16_t* audioBufferI16 = (int16_t*) [state->shortBufferSlices[state->transcribeSliceIndex] pointerValue];
|
|
154
|
+
float* audioBufferF32 = (float*) malloc(state->nSamplesTranscribing * sizeof(float));
|
|
155
|
+
// convert I16 to F32
|
|
156
|
+
for (int i = 0; i < state->nSamplesTranscribing; i++) {
|
|
157
|
+
audioBufferF32[i] = (float)audioBufferI16[i] / 32768.0f;
|
|
158
|
+
}
|
|
159
|
+
CFTimeInterval timeStart = CACurrentMediaTime();
|
|
160
|
+
int code = [state->mSelf fullTranscribe:state->jobId audioData:audioBufferF32 audioDataCount:state->nSamplesTranscribing options:state->options];
|
|
161
|
+
free(audioBufferF32);
|
|
162
|
+
CFTimeInterval timeEnd = CACurrentMediaTime();
|
|
163
|
+
const float timeRecording = (float) state->nSamplesTranscribing / (float) state->dataFormat.mSampleRate;
|
|
164
|
+
|
|
165
|
+
NSDictionary* base = @{
|
|
166
|
+
@"code": [NSNumber numberWithInt:code],
|
|
167
|
+
@"processTime": [NSNumber numberWithInt:(timeEnd - timeStart) * 1E3],
|
|
168
|
+
@"recordingTime": [NSNumber numberWithInt:timeRecording * 1E3],
|
|
169
|
+
@"isUseSlices": @(state->isUseSlices),
|
|
170
|
+
@"sliceIndex": @(state->transcribeSliceIndex),
|
|
171
|
+
};
|
|
172
|
+
|
|
173
|
+
NSMutableDictionary* result = [base mutableCopy];
|
|
174
|
+
|
|
175
|
+
if (code == 0) {
|
|
176
|
+
result[@"data"] = [state->mSelf getTextSegments];
|
|
177
|
+
} else {
|
|
178
|
+
result[@"error"] = [NSString stringWithFormat:@"Transcribe failed with code %d", code];
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
nSamplesOfIndex = [[state->sliceNSamples objectAtIndex:state->transcribeSliceIndex] intValue];
|
|
182
|
+
if (
|
|
183
|
+
state->isStoppedByAction ||
|
|
184
|
+
(
|
|
185
|
+
!state->isCapturing &&
|
|
186
|
+
state->nSamplesTranscribing == nSamplesOfIndex &&
|
|
187
|
+
state->sliceIndex == state->transcribeSliceIndex
|
|
188
|
+
)
|
|
189
|
+
) {
|
|
190
|
+
NSLog(@"[RNWhisper] Transcribe end");
|
|
191
|
+
result[@"isStoppedByAction"] = @(state->isStoppedByAction);
|
|
192
|
+
result[@"isCapturing"] = @(false);
|
|
193
|
+
state->transcribeHandler(state->jobId, @"end", result);
|
|
194
|
+
} else if (code == 0) {
|
|
195
|
+
result[@"isCapturing"] = @(true);
|
|
196
|
+
state->transcribeHandler(state->jobId, @"transcribe", result);
|
|
197
|
+
} else {
|
|
198
|
+
result[@"isCapturing"] = @(true);
|
|
199
|
+
state->transcribeHandler(state->jobId, @"transcribe", result);
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
if (
|
|
203
|
+
// If no more samples on current slice, move to next slice
|
|
204
|
+
state->nSamplesTranscribing == nSamplesOfIndex &&
|
|
205
|
+
state->transcribeSliceIndex != state->sliceIndex
|
|
206
|
+
) {
|
|
207
|
+
state->transcribeSliceIndex++;
|
|
208
|
+
state->nSamplesTranscribing = 0;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
if (
|
|
212
|
+
!state->isCapturing &&
|
|
213
|
+
state->nSamplesTranscribing != nSamplesOfIndex
|
|
214
|
+
) {
|
|
215
|
+
state->isTranscribing = true;
|
|
216
|
+
// Finish transcribing the rest of the samples
|
|
217
|
+
[self fullTranscribeSamples:state];
|
|
218
|
+
}
|
|
219
|
+
state->isTranscribing = false;
|
|
220
|
+
}
|
|
221
|
+
|
|
120
222
|
- (bool)isCapturing {
|
|
121
223
|
return self->recordState.isCapturing;
|
|
122
224
|
}
|
|
@@ -132,7 +234,6 @@ void AudioInputCallback(void * inUserData,
|
|
|
132
234
|
self->recordState.transcribeHandler = onTranscribe;
|
|
133
235
|
self->recordState.jobId = jobId;
|
|
134
236
|
[self prepareRealtime:options];
|
|
135
|
-
self->recordState.nSamples = 0;
|
|
136
237
|
|
|
137
238
|
OSStatus status = AudioQueueNewInput(
|
|
138
239
|
&self->recordState.dataFormat,
|
|
@@ -184,6 +285,7 @@ void AudioInputCallback(void * inUserData,
|
|
|
184
285
|
return;
|
|
185
286
|
}
|
|
186
287
|
self->recordState.isCapturing = false;
|
|
288
|
+
self->recordState.isStoppedByAction = true;
|
|
187
289
|
[self stopAudio];
|
|
188
290
|
}
|
|
189
291
|
|
|
@@ -216,7 +318,7 @@ void AudioInputCallback(void * inUserData,
|
|
|
216
318
|
params.n_threads = max_threads;
|
|
217
319
|
params.offset_ms = 0;
|
|
218
320
|
params.no_context = true;
|
|
219
|
-
params.single_segment =
|
|
321
|
+
params.single_segment = false;
|
|
220
322
|
|
|
221
323
|
if (options[@"maxLen"] != nil) {
|
|
222
324
|
params.max_len = [options[@"maxLen"] intValue];
|
|
@@ -298,6 +400,7 @@ void AudioInputCallback(void * inUserData,
|
|
|
298
400
|
- (void)invalidate {
|
|
299
401
|
[self stopCurrentTranscribe];
|
|
300
402
|
whisper_free(self->ctx);
|
|
403
|
+
[self freeBufferIfNeeded];
|
|
301
404
|
}
|
|
302
405
|
|
|
303
406
|
@end
|
package/jest/mock.js
CHANGED
|
@@ -23,7 +23,6 @@ if (!NativeModules.RNWhisper) {
|
|
|
23
23
|
recordingTime: 1000,
|
|
24
24
|
},
|
|
25
25
|
})
|
|
26
|
-
// End
|
|
27
26
|
DeviceEventEmitter.emit('@RNWhisper_onRealtimeTranscribe', {
|
|
28
27
|
contextId,
|
|
29
28
|
jobId,
|
|
@@ -37,6 +36,12 @@ if (!NativeModules.RNWhisper) {
|
|
|
37
36
|
recordingTime: 2000,
|
|
38
37
|
},
|
|
39
38
|
})
|
|
39
|
+
// End event
|
|
40
|
+
DeviceEventEmitter.emit('@RNWhisper_onRealtimeTranscribeEnd', {
|
|
41
|
+
contextId,
|
|
42
|
+
jobId,
|
|
43
|
+
payload: {},
|
|
44
|
+
})
|
|
40
45
|
})
|
|
41
46
|
}),
|
|
42
47
|
releaseContext: jest.fn(() => Promise.resolve()),
|
package/lib/commonjs/index.js
CHANGED
|
@@ -44,9 +44,52 @@ class WhisperContext {
|
|
|
44
44
|
let options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {};
|
|
45
45
|
const jobId = Math.floor(Math.random() * 10000);
|
|
46
46
|
await RNWhisper.startRealtimeTranscribe(this.id, jobId, options);
|
|
47
|
-
let removeTranscribe;
|
|
48
|
-
let removeEnd;
|
|
49
47
|
let lastTranscribePayload;
|
|
48
|
+
const slices = [];
|
|
49
|
+
let sliceIndex = 0;
|
|
50
|
+
let tOffset = 0;
|
|
51
|
+
const putSlice = payload => {
|
|
52
|
+
if (!payload.isUseSlices) return;
|
|
53
|
+
if (sliceIndex !== payload.sliceIndex) {
|
|
54
|
+
var _slices$sliceIndex, _segments;
|
|
55
|
+
const {
|
|
56
|
+
segments = []
|
|
57
|
+
} = ((_slices$sliceIndex = slices[sliceIndex]) === null || _slices$sliceIndex === void 0 ? void 0 : _slices$sliceIndex.data) || {};
|
|
58
|
+
tOffset = ((_segments = segments[segments.length - 1]) === null || _segments === void 0 ? void 0 : _segments.t1) || 0;
|
|
59
|
+
}
|
|
60
|
+
({
|
|
61
|
+
sliceIndex
|
|
62
|
+
} = payload);
|
|
63
|
+
slices[sliceIndex] = {
|
|
64
|
+
...payload,
|
|
65
|
+
data: payload.data ? {
|
|
66
|
+
...payload.data,
|
|
67
|
+
segments: payload.data.segments.map(segment => ({
|
|
68
|
+
...segment,
|
|
69
|
+
t0: segment.t0 + tOffset,
|
|
70
|
+
t1: segment.t1 + tOffset
|
|
71
|
+
})) || []
|
|
72
|
+
} : undefined
|
|
73
|
+
};
|
|
74
|
+
};
|
|
75
|
+
const mergeSlicesIfNeeded = payload => {
|
|
76
|
+
if (!payload.isUseSlices) return payload;
|
|
77
|
+
const mergedPayload = {};
|
|
78
|
+
slices.forEach(slice => {
|
|
79
|
+
var _mergedPayload$data, _slice$data, _mergedPayload$data2, _slice$data2;
|
|
80
|
+
mergedPayload.data = {
|
|
81
|
+
result: (((_mergedPayload$data = mergedPayload.data) === null || _mergedPayload$data === void 0 ? void 0 : _mergedPayload$data.result) || '') + (((_slice$data = slice.data) === null || _slice$data === void 0 ? void 0 : _slice$data.result) || ''),
|
|
82
|
+
segments: [...((mergedPayload === null || mergedPayload === void 0 ? void 0 : (_mergedPayload$data2 = mergedPayload.data) === null || _mergedPayload$data2 === void 0 ? void 0 : _mergedPayload$data2.segments) || []), ...(((_slice$data2 = slice.data) === null || _slice$data2 === void 0 ? void 0 : _slice$data2.segments) || [])]
|
|
83
|
+
};
|
|
84
|
+
mergedPayload.processTime = slice.processTime;
|
|
85
|
+
mergedPayload.recordingTime = ((mergedPayload === null || mergedPayload === void 0 ? void 0 : mergedPayload.recordingTime) || 0) + slice.recordingTime;
|
|
86
|
+
});
|
|
87
|
+
return {
|
|
88
|
+
...payload,
|
|
89
|
+
...mergedPayload,
|
|
90
|
+
slices
|
|
91
|
+
};
|
|
92
|
+
};
|
|
50
93
|
return {
|
|
51
94
|
stop: () => RNWhisper.abortTranscribe(this.id, jobId),
|
|
52
95
|
subscribe: callback => {
|
|
@@ -57,30 +100,33 @@ class WhisperContext {
|
|
|
57
100
|
} = evt;
|
|
58
101
|
if (contextId !== this.id || evt.jobId !== jobId) return;
|
|
59
102
|
lastTranscribePayload = payload;
|
|
103
|
+
putSlice(payload);
|
|
60
104
|
callback({
|
|
61
105
|
contextId,
|
|
62
106
|
jobId: evt.jobId,
|
|
63
|
-
...payload
|
|
107
|
+
...mergeSlicesIfNeeded(payload)
|
|
64
108
|
});
|
|
65
|
-
if (!payload.isCapturing) removeTranscribe();
|
|
66
109
|
});
|
|
67
|
-
removeTranscribe = transcribeListener.remove;
|
|
68
110
|
const endListener = EventEmitter.addListener(EVENT_ON_REALTIME_TRANSCRIBE_END, evt => {
|
|
69
|
-
var _removeTranscribe;
|
|
70
111
|
const {
|
|
71
|
-
contextId
|
|
112
|
+
contextId,
|
|
113
|
+
payload
|
|
72
114
|
} = evt;
|
|
73
115
|
if (contextId !== this.id || evt.jobId !== jobId) return;
|
|
116
|
+
const lastPayload = {
|
|
117
|
+
...lastTranscribePayload,
|
|
118
|
+
...payload
|
|
119
|
+
};
|
|
120
|
+
putSlice(lastPayload);
|
|
74
121
|
callback({
|
|
75
122
|
contextId,
|
|
76
123
|
jobId: evt.jobId,
|
|
77
|
-
...
|
|
124
|
+
...mergeSlicesIfNeeded(lastPayload),
|
|
78
125
|
isCapturing: false
|
|
79
126
|
});
|
|
80
|
-
(
|
|
81
|
-
|
|
127
|
+
transcribeListener.remove();
|
|
128
|
+
endListener.remove();
|
|
82
129
|
});
|
|
83
|
-
removeEnd = endListener.remove;
|
|
84
130
|
}
|
|
85
131
|
};
|
|
86
132
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"names":["_reactNative","require","LINKING_ERROR","Platform","select","ios","default","RNWhisper","NativeModules","Proxy","get","Error","EventEmitter","OS","NativeEventEmitter","DeviceEventEmitter","EVENT_ON_REALTIME_TRANSCRIBE","EVENT_ON_REALTIME_TRANSCRIBE_END","WhisperContext","constructor","id","transcribe","path","options","arguments","length","undefined","jobId","Math","floor","random","stop","abortTranscribe","promise","transcribeFile","transcribeRealtime","startRealtimeTranscribe","
|
|
1
|
+
{"version":3,"names":["_reactNative","require","LINKING_ERROR","Platform","select","ios","default","RNWhisper","NativeModules","Proxy","get","Error","EventEmitter","OS","NativeEventEmitter","DeviceEventEmitter","EVENT_ON_REALTIME_TRANSCRIBE","EVENT_ON_REALTIME_TRANSCRIBE_END","WhisperContext","constructor","id","transcribe","path","options","arguments","length","undefined","jobId","Math","floor","random","stop","abortTranscribe","promise","transcribeFile","transcribeRealtime","startRealtimeTranscribe","lastTranscribePayload","slices","sliceIndex","tOffset","putSlice","payload","isUseSlices","_slices$sliceIndex","_segments","segments","data","t1","map","segment","t0","mergeSlicesIfNeeded","mergedPayload","forEach","slice","_mergedPayload$data","_slice$data","_mergedPayload$data2","_slice$data2","result","processTime","recordingTime","subscribe","callback","transcribeListener","addListener","evt","contextId","endListener","lastPayload","isCapturing","remove","release","releaseContext","initWhisper","filePath","initContext","releaseAllWhisper","releaseAllContexts"],"sourceRoot":"../../src","sources":["index.tsx"],"mappings":";;;;;;;AAAA,IAAAA,YAAA,GAAAC,OAAA;AAQA,MAAMC,aAAa,GAChB,sEAAqEC,qBAAQ,CAACC,MAAM,CAAC;EAAEC,GAAG,EAAE,gCAAgC;EAAEC,OAAO,EAAE;AAAG,CAAC,CAC3I,oDAAmD;AAEtD,MAAMC,SAAS,GAAGC,0BAAa,CAACD,SAAS,GACrCC,0BAAa,CAACD,SAAS,GACvB,IAAIE,KAAK,CACT,CAAC,CAAC,EACF;EACEC,GAAGA,CAAA,EAAG;IACJ,MAAM,IAAIC,KAAK,CAACT,aAAa,CAAC;EAChC;AACF,CAAC,CACF;AAEH,IAAIU,YAA2D;AAC/D,IAAIT,qBAAQ,CAACU,EAAE,KAAK,KAAK,EAAE;EACzBD,YAAY,GAAG,IAAIE,+BAAkB,CAACP,SAAS,CAAC;AAClD;AACA,IAAIJ,qBAAQ,CAACU,EAAE,KAAK,SAAS,EAAE;EAC7BD,YAAY,GAAGG,+BAAkB;AACnC;AAEA,MAAMC,4BAA4B,GAAG,iCAAiC;AACtE,MAAMC,gCAAgC,GAAG,oCAAoC;AAiG7E,MAAMC,cAAc,CAAC;EAGnBC,WAAWA,CAACC,EAAU,EAAE;IACtB,IAAI,CAACA,EAAE,GAAGA,EAAE;EACd;;EAEA;EACAC,UAAUA,CAACC,IAAY,EAKrB;IAAA,IALuBC,OAA0B,GAAAC,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;IAMtD,MAAMG,KAAa,GAAGC,IAAI,CAACC,KAAK,CAACD,IAAI,CAACE,MAAM,EAAE,GAAG,KAAK,CAAC;IACvD,OAAO;MACLC,IAAI,EAAEA,CAAA,KAAMxB,SAAS,CAACyB,eAAe,CAAC,IAAI,CAACZ,EAAE,EAAEO,KAAK,CAAC;MACrDM,OAAO,EAAE1B,SAAS,CAAC2B,cAAc,CAAC,IAAI,CAACd,EAAE,EAAEO,KAAK,EAAEL,IAAI,EAAEC,OAAO;IACjE,CAAC;EACH;;EAEA;EACA,MAAMY,kBAAkBA,CAAA,EAKrB;IAAA,IALsBZ,OAAkC,GAAAC,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;IAM9D,MAAMG,KAAa,GAAGC,IAAI,CAACC,KAAK,CAACD,IAAI,CAACE,MAAM,EAAE,GAAG,KAAK,CAAC;IACvD,MAAMvB,SAAS,CAAC6B,uBAAuB,CAAC,IAAI,CAAChB,EAAE,EAAEO,KAAK,EAAEJ,OAAO,CAAC;IAChE,IAAIc,qBAAsD;IAE1D,MAAMC,MAAyC,GAAG,EAAE;IACpD,IAAIC,UAAkB,GAAG,CAAC;IAC1B,IAAIC,OAAe,GAAG,CAAC;IAEvB,MAAMC,QAAQ,GAAIC,OAAwC,IAAK;MAC7D,IAAI,CAACA,OAAO,CAACC,WAAW,EAAE;MAC1B,IAAIJ,UAAU,KAAKG,OAAO,CAACH,UAAU,EAAE;QAAA,IAAAK,kBAAA,EAAAC,SAAA;QACrC,MAAM;UAAEC,QAAQ,GAAG;QAAG,CAAC,GAAG,EAAAF,kBAAA,GAAAN,MAAM,CAACC,UAAU,CAAC,cAAAK,kBAAA,uBAAlBA,kBAAA,CAAoBG,IAAI,KAAI,CAAC,CAAC;QACxDP,OAAO,GAAG,EAAAK,SAAA,GAAAC,QAAQ,CAACA,QAAQ,CAACrB,MAAM,GAAG,CAAC,CAAC,cAAAoB,SAAA,uBAA7BA,SAAA,CAA+BG,EAAE,KAAI,CAAC;MAClD;MACA,CAAC;QAAET;MAAW,CAAC,GAAGG,OAAO;MACzBJ,MAAM,CAACC,UAAU,CAAC,GAAG;QACnB,GAAGG,OAAO;QACVK,IAAI,EAAEL,OAAO,CAACK,IAAI,GAAG;UACnB,GAAGL,OAAO,CAACK,IAAI;UACfD,QAAQ,EAAEJ,OAAO,CAACK,IAAI,CAACD,QAAQ,CAACG,GAAG,CAAEC,OAAO,KAAM;YAChD,GAAGA,OAAO;YACVC,EAAE,EAAED,OAAO,CAACC,EAAE,GAAGX,OAAO;YACxBQ,EAAE,EAAEE,OAAO,CAACF,EAAE,GAAGR;UACnB,CAAC,CAAC,CAAC,IAAI;QACT,CAAC,GAAGd;MACN,CAAC;IACH,CAAC;IAED,MAAM0B,mBAAmB,GAAIV,OAAwC,IAAsC;MACzG,IAAI,CAACA,OAAO,CAACC,WAAW,EAAE,OAAOD,OAAO;MAExC,MAAMW,aAAkB,GAAG,CAAC,CAAC;MAC7Bf,MAAM,CAACgB,OAAO,CACXC,KAAK,IAAK;QAAA,IAAAC,mBAAA,EAAAC,WAAA,EAAAC,oBAAA,EAAAC,YAAA;QACTN,aAAa,CAACN,IAAI,GAAG;UACnBa,MAAM,EAAE,CAAC,EAAAJ,mBAAA,GAAAH,aAAa,CAACN,IAAI,cAAAS,mBAAA,uBAAlBA,mBAAA,CAAoBI,MAAM,KAAI,EAAE,KAAK,EAAAH,WAAA,GAAAF,KAAK,CAACR,IAAI,cAAAU,WAAA,uBAAVA,WAAA,CAAYG,MAAM,KAAI,EAAE,CAAC;UACvEd,QAAQ,EAAE,CACR,IAAI,CAAAO,aAAa,aAAbA,aAAa,wBAAAK,oBAAA,GAAbL,aAAa,CAAEN,IAAI,cAAAW,oBAAA,uBAAnBA,oBAAA,CAAqBZ,QAAQ,KAAI,EAAE,CAAC,EACxC,IAAI,EAAAa,YAAA,GAAAJ,KAAK,CAACR,IAAI,cAAAY,YAAA,uBAAVA,YAAA,CAAYb,QAAQ,KAAI,EAAE,CAAC;QAEnC,CAAC;QACDO,aAAa,CAACQ,WAAW,GAAGN,KAAK,CAACM,WAAW;QAC7CR,aAAa,CAACS,aAAa,GAAG,CAAC,CAAAT,aAAa,aAAbA,aAAa,uBAAbA,aAAa,CAAES,aAAa,KAAI,CAAC,IAAIP,KAAK,CAACO,aAAa;MACzF,CAAC,CACF;MACD,OAAO;QAAE,GAAGpB,OAAO;QAAE,GAAGW,aAAa;QAAEf;MAAO,CAAC;IACjD,CAAC;IAED,OAAO;MACLP,IAAI,EAAEA,CAAA,KAAMxB,SAAS,CAACyB,eAAe,CAAC,IAAI,CAACZ,EAAE,EAAEO,KAAK,CAAC;MACrDoC,SAAS,EAAGC,QAAkD,IAAK;QACjE,MAAMC,kBAAkB,GAAGrD,YAAY,CAACsD,WAAW,CACjDlD,4BAA4B,EAC3BmD,GAAkC,IAAK;UACtC,MAAM;YAAEC,SAAS;YAAE1B;UAAQ,CAAC,GAAGyB,GAAG;UAClC,IAAIC,SAAS,KAAK,IAAI,CAAChD,EAAE,IAAI+C,GAAG,CAACxC,KAAK,KAAKA,KAAK,EAAE;UAClDU,qBAAqB,GAAGK,OAAO;UAC/BD,QAAQ,CAACC,OAAO,CAAC;UACjBsB,QAAQ,CAAC;YACPI,SAAS;YACTzC,KAAK,EAAEwC,GAAG,CAACxC,KAAK;YAChB,GAAGyB,mBAAmB,CAACV,OAAO;UAChC,CAAC,CAAC;QACJ,CAAC,CACF;QACD,MAAM2B,WAAW,GAAGzD,YAAY,CAACsD,WAAW,CAC1CjD,gCAAgC,EAC/BkD,GAAkC,IAAK;UACtC,MAAM;YAAEC,SAAS;YAAE1B;UAAQ,CAAC,GAAGyB,GAAG;UAClC,IAAIC,SAAS,KAAK,IAAI,CAAChD,EAAE,IAAI+C,GAAG,CAACxC,KAAK,KAAKA,KAAK,EAAE;UAClD,MAAM2C,WAAW,GAAG;YAClB,GAAGjC,qBAAqB;YACxB,GAAGK;UACL,CAAC;UACDD,QAAQ,CAAC6B,WAAW,CAAC;UACrBN,QAAQ,CAAC;YACPI,SAAS;YACTzC,KAAK,EAAEwC,GAAG,CAACxC,KAAK;YAChB,GAAGyB,mBAAmB,CAACkB,WAAW,CAAC;YACnCC,WAAW,EAAE;UACf,CAAC,CAAC;UACFN,kBAAkB,CAACO,MAAM,EAAE;UAC3BH,WAAW,CAACG,MAAM,EAAE;QACtB,CAAC,CACF;MACH;IACF,CAAC;EACH;EAEA,MAAMC,OAAOA,CAAA,EAAG;IACd,OAAOlE,SAAS,CAACmE,cAAc,CAAC,IAAI,CAACtD,EAAE,CAAC;EAC1C;AACF;AAEO,eAAeuD,WAAWA,CAAA,EAEN;EAAA,IADzB;IAAEC;EAAgC,CAAC,GAAApD,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;EAExC,MAAMJ,EAAE,GAAG,MAAMb,SAAS,CAACsE,WAAW,CAACD,QAAQ,CAAC;EAChD,OAAO,IAAI1D,cAAc,CAACE,EAAE,CAAC;AAC/B;AAEO,eAAe0D,iBAAiBA,CAAA,EAAkB;EACvD,OAAOvE,SAAS,CAACwE,kBAAkB,EAAE;AACvC"}
|
package/lib/module/index.js
CHANGED
|
@@ -37,9 +37,52 @@ class WhisperContext {
|
|
|
37
37
|
let options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {};
|
|
38
38
|
const jobId = Math.floor(Math.random() * 10000);
|
|
39
39
|
await RNWhisper.startRealtimeTranscribe(this.id, jobId, options);
|
|
40
|
-
let removeTranscribe;
|
|
41
|
-
let removeEnd;
|
|
42
40
|
let lastTranscribePayload;
|
|
41
|
+
const slices = [];
|
|
42
|
+
let sliceIndex = 0;
|
|
43
|
+
let tOffset = 0;
|
|
44
|
+
const putSlice = payload => {
|
|
45
|
+
if (!payload.isUseSlices) return;
|
|
46
|
+
if (sliceIndex !== payload.sliceIndex) {
|
|
47
|
+
var _slices$sliceIndex, _segments;
|
|
48
|
+
const {
|
|
49
|
+
segments = []
|
|
50
|
+
} = ((_slices$sliceIndex = slices[sliceIndex]) === null || _slices$sliceIndex === void 0 ? void 0 : _slices$sliceIndex.data) || {};
|
|
51
|
+
tOffset = ((_segments = segments[segments.length - 1]) === null || _segments === void 0 ? void 0 : _segments.t1) || 0;
|
|
52
|
+
}
|
|
53
|
+
({
|
|
54
|
+
sliceIndex
|
|
55
|
+
} = payload);
|
|
56
|
+
slices[sliceIndex] = {
|
|
57
|
+
...payload,
|
|
58
|
+
data: payload.data ? {
|
|
59
|
+
...payload.data,
|
|
60
|
+
segments: payload.data.segments.map(segment => ({
|
|
61
|
+
...segment,
|
|
62
|
+
t0: segment.t0 + tOffset,
|
|
63
|
+
t1: segment.t1 + tOffset
|
|
64
|
+
})) || []
|
|
65
|
+
} : undefined
|
|
66
|
+
};
|
|
67
|
+
};
|
|
68
|
+
const mergeSlicesIfNeeded = payload => {
|
|
69
|
+
if (!payload.isUseSlices) return payload;
|
|
70
|
+
const mergedPayload = {};
|
|
71
|
+
slices.forEach(slice => {
|
|
72
|
+
var _mergedPayload$data, _slice$data, _mergedPayload$data2, _slice$data2;
|
|
73
|
+
mergedPayload.data = {
|
|
74
|
+
result: (((_mergedPayload$data = mergedPayload.data) === null || _mergedPayload$data === void 0 ? void 0 : _mergedPayload$data.result) || '') + (((_slice$data = slice.data) === null || _slice$data === void 0 ? void 0 : _slice$data.result) || ''),
|
|
75
|
+
segments: [...((mergedPayload === null || mergedPayload === void 0 ? void 0 : (_mergedPayload$data2 = mergedPayload.data) === null || _mergedPayload$data2 === void 0 ? void 0 : _mergedPayload$data2.segments) || []), ...(((_slice$data2 = slice.data) === null || _slice$data2 === void 0 ? void 0 : _slice$data2.segments) || [])]
|
|
76
|
+
};
|
|
77
|
+
mergedPayload.processTime = slice.processTime;
|
|
78
|
+
mergedPayload.recordingTime = ((mergedPayload === null || mergedPayload === void 0 ? void 0 : mergedPayload.recordingTime) || 0) + slice.recordingTime;
|
|
79
|
+
});
|
|
80
|
+
return {
|
|
81
|
+
...payload,
|
|
82
|
+
...mergedPayload,
|
|
83
|
+
slices
|
|
84
|
+
};
|
|
85
|
+
};
|
|
43
86
|
return {
|
|
44
87
|
stop: () => RNWhisper.abortTranscribe(this.id, jobId),
|
|
45
88
|
subscribe: callback => {
|
|
@@ -50,30 +93,33 @@ class WhisperContext {
|
|
|
50
93
|
} = evt;
|
|
51
94
|
if (contextId !== this.id || evt.jobId !== jobId) return;
|
|
52
95
|
lastTranscribePayload = payload;
|
|
96
|
+
putSlice(payload);
|
|
53
97
|
callback({
|
|
54
98
|
contextId,
|
|
55
99
|
jobId: evt.jobId,
|
|
56
|
-
...payload
|
|
100
|
+
...mergeSlicesIfNeeded(payload)
|
|
57
101
|
});
|
|
58
|
-
if (!payload.isCapturing) removeTranscribe();
|
|
59
102
|
});
|
|
60
|
-
removeTranscribe = transcribeListener.remove;
|
|
61
103
|
const endListener = EventEmitter.addListener(EVENT_ON_REALTIME_TRANSCRIBE_END, evt => {
|
|
62
|
-
var _removeTranscribe;
|
|
63
104
|
const {
|
|
64
|
-
contextId
|
|
105
|
+
contextId,
|
|
106
|
+
payload
|
|
65
107
|
} = evt;
|
|
66
108
|
if (contextId !== this.id || evt.jobId !== jobId) return;
|
|
109
|
+
const lastPayload = {
|
|
110
|
+
...lastTranscribePayload,
|
|
111
|
+
...payload
|
|
112
|
+
};
|
|
113
|
+
putSlice(lastPayload);
|
|
67
114
|
callback({
|
|
68
115
|
contextId,
|
|
69
116
|
jobId: evt.jobId,
|
|
70
|
-
...
|
|
117
|
+
...mergeSlicesIfNeeded(lastPayload),
|
|
71
118
|
isCapturing: false
|
|
72
119
|
});
|
|
73
|
-
(
|
|
74
|
-
|
|
120
|
+
transcribeListener.remove();
|
|
121
|
+
endListener.remove();
|
|
75
122
|
});
|
|
76
|
-
removeEnd = endListener.remove;
|
|
77
123
|
}
|
|
78
124
|
};
|
|
79
125
|
}
|
package/lib/module/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"names":["NativeEventEmitter","DeviceEventEmitter","NativeModules","Platform","LINKING_ERROR","select","ios","default","RNWhisper","Proxy","get","Error","EventEmitter","OS","EVENT_ON_REALTIME_TRANSCRIBE","EVENT_ON_REALTIME_TRANSCRIBE_END","WhisperContext","constructor","id","transcribe","path","options","arguments","length","undefined","jobId","Math","floor","random","stop","abortTranscribe","promise","transcribeFile","transcribeRealtime","startRealtimeTranscribe","
|
|
1
|
+
{"version":3,"names":["NativeEventEmitter","DeviceEventEmitter","NativeModules","Platform","LINKING_ERROR","select","ios","default","RNWhisper","Proxy","get","Error","EventEmitter","OS","EVENT_ON_REALTIME_TRANSCRIBE","EVENT_ON_REALTIME_TRANSCRIBE_END","WhisperContext","constructor","id","transcribe","path","options","arguments","length","undefined","jobId","Math","floor","random","stop","abortTranscribe","promise","transcribeFile","transcribeRealtime","startRealtimeTranscribe","lastTranscribePayload","slices","sliceIndex","tOffset","putSlice","payload","isUseSlices","_slices$sliceIndex","_segments","segments","data","t1","map","segment","t0","mergeSlicesIfNeeded","mergedPayload","forEach","slice","_mergedPayload$data","_slice$data","_mergedPayload$data2","_slice$data2","result","processTime","recordingTime","subscribe","callback","transcribeListener","addListener","evt","contextId","endListener","lastPayload","isCapturing","remove","release","releaseContext","initWhisper","filePath","initContext","releaseAllWhisper","releaseAllContexts"],"sourceRoot":"../../src","sources":["index.tsx"],"mappings":"AAAA,SACEA,kBAAkB,EAClBC,kBAAkB,EAClBC,aAAa,EACbC,QAAQ,QAEH,cAAc;AAErB,MAAMC,aAAa,GAChB,sEAAqED,QAAQ,CAACE,MAAM,CAAC;EAAEC,GAAG,EAAE,gCAAgC;EAAEC,OAAO,EAAE;AAAG,CAAC,CAC3I,oDAAmD;AAEtD,MAAMC,SAAS,GAAGN,aAAa,CAACM,SAAS,GACrCN,aAAa,CAACM,SAAS,GACvB,IAAIC,KAAK,CACT,CAAC,CAAC,EACF;EACEC,GAAGA,CAAA,EAAG;IACJ,MAAM,IAAIC,KAAK,CAACP,aAAa,CAAC;EAChC;AACF,CAAC,CACF;AAEH,IAAIQ,YAA2D;AAC/D,IAAIT,QAAQ,CAACU,EAAE,KAAK,KAAK,EAAE;EACzBD,YAAY,GAAG,IAAIZ,kBAAkB,CAACQ,SAAS,CAAC;AAClD;AACA,IAAIL,QAAQ,CAACU,EAAE,KAAK,SAAS,EAAE;EAC7BD,YAAY,GAAGX,kBAAkB;AACnC;AAEA,MAAMa,4BAA4B,GAAG,iCAAiC;AACtE,MAAMC,gCAAgC,GAAG,oCAAoC;AAiG7E,MAAMC,cAAc,CAAC;EAGnBC,WAAWA,CAACC,EAAU,EAAE;IACtB,IAAI,CAACA,EAAE,GAAGA,EAAE;EACd;;EAEA;EACAC,UAAUA,CAACC,IAAY,EAKrB;IAAA,IALuBC,OAA0B,GAAAC,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;IAMtD,MAAMG,KAAa,GAAGC,IAAI,CAACC,KAAK,CAACD,IAAI,CAACE,MAAM,EAAE,GAAG,KAAK,CAAC;IACvD,OAAO;MACLC,IAAI,EAAEA,CAAA,KAAMrB,SAAS,CAACsB,eAAe,CAAC,IAAI,CAACZ,EAAE,EAAEO,KAAK,CAAC;MACrDM,OAAO,EAAEvB,SAAS,CAACwB,cAAc,CAAC,IAAI,CAACd,EAAE,EAAEO,KAAK,EAAEL,IAAI,EAAEC,OAAO;IACjE,CAAC;EACH;;EAEA;EACA,MAAMY,kBAAkBA,CAAA,EAKrB;IAAA,IALsBZ,OAAkC,GAAAC,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;IAM9D,MAAMG,KAAa,GAAGC,IAAI,CAACC,KAAK,CAACD,IAAI,CAACE,MAAM,EAAE,GAAG,KAAK,CAAC;IACvD,MAAMpB,SAAS,CAAC0B,uBAAuB,CAAC,IAAI,CAAChB,EAAE,EAAEO,KAAK,EAAEJ,OAAO,CAAC;IAChE,IAAIc,qBAAsD;IAE1D,MAAMC,MAAyC,GAAG,EAAE;IACpD,IAAIC,UAAkB,GAAG,CAAC;IAC1B,IAAIC,OAAe,GAAG,CAAC;IAEvB,MAAMC,QAAQ,GAAIC,OAAwC,IAAK;MAC7D,IAAI,CAACA,OAAO,CAACC,WAAW,EAAE;MAC1B,IAAIJ,UAAU,KAAKG,OAAO,CAACH,UAAU,EAAE;QAAA,IAAAK,kBAAA,EAAAC,SAAA;QACrC,MAAM;UAAEC,QAAQ,GAAG;QAAG,CAAC,GAAG,EAAAF,kBAAA,GAAAN,MAAM,CAACC,UAAU,CAAC,cAAAK,kBAAA,uBAAlBA,kBAAA,CAAoBG,IAAI,KAAI,CAAC,CAAC;QACxDP,OAAO,GAAG,EAAAK,SAAA,GAAAC,QAAQ,CAACA,QAAQ,CAACrB,MAAM,GAAG,CAAC,CAAC,cAAAoB,SAAA,uBAA7BA,SAAA,CAA+BG,EAAE,KAAI,CAAC;MAClD;MACA,CAAC;QAAET;MAAW,CAAC,GAAGG,OAAO;MACzBJ,MAAM,CAACC,UAAU,CAAC,GAAG;QACnB,GAAGG,OAAO;QACVK,IAAI,EAAEL,OAAO,CAACK,IAAI,GAAG;UACnB,GAAGL,OAAO,CAACK,IAAI;UACfD,QAAQ,EAAEJ,OAAO,CAACK,IAAI,CAACD,QAAQ,CAACG,GAAG,CAAEC,OAAO,KAAM;YAChD,GAAGA,OAAO;YACVC,EAAE,EAAED,OAAO,CAACC,EAAE,GAAGX,OAAO;YACxBQ,EAAE,EAAEE,OAAO,CAACF,EAAE,GAAGR;UACnB,CAAC,CAAC,CAAC,IAAI;QACT,CAAC,GAAGd;MACN,CAAC;IACH,CAAC;IAED,MAAM0B,mBAAmB,GAAIV,OAAwC,IAAsC;MACzG,IAAI,CAACA,OAAO,CAACC,WAAW,EAAE,OAAOD,OAAO;MAExC,MAAMW,aAAkB,GAAG,CAAC,CAAC;MAC7Bf,MAAM,CAACgB,OAAO,CACXC,KAAK,IAAK;QAAA,IAAAC,mBAAA,EAAAC,WAAA,EAAAC,oBAAA,EAAAC,YAAA;QACTN,aAAa,CAACN,IAAI,GAAG;UACnBa,MAAM,EAAE,CAAC,EAAAJ,mBAAA,GAAAH,aAAa,CAACN,IAAI,cAAAS,mBAAA,uBAAlBA,mBAAA,CAAoBI,MAAM,KAAI,EAAE,KAAK,EAAAH,WAAA,GAAAF,KAAK,CAACR,IAAI,cAAAU,WAAA,uBAAVA,WAAA,CAAYG,MAAM,KAAI,EAAE,CAAC;UACvEd,QAAQ,EAAE,CACR,IAAI,CAAAO,aAAa,aAAbA,aAAa,wBAAAK,oBAAA,GAAbL,aAAa,CAAEN,IAAI,cAAAW,oBAAA,uBAAnBA,oBAAA,CAAqBZ,QAAQ,KAAI,EAAE,CAAC,EACxC,IAAI,EAAAa,YAAA,GAAAJ,KAAK,CAACR,IAAI,cAAAY,YAAA,uBAAVA,YAAA,CAAYb,QAAQ,KAAI,EAAE,CAAC;QAEnC,CAAC;QACDO,aAAa,CAACQ,WAAW,GAAGN,KAAK,CAACM,WAAW;QAC7CR,aAAa,CAACS,aAAa,GAAG,CAAC,CAAAT,aAAa,aAAbA,aAAa,uBAAbA,aAAa,CAAES,aAAa,KAAI,CAAC,IAAIP,KAAK,CAACO,aAAa;MACzF,CAAC,CACF;MACD,OAAO;QAAE,GAAGpB,OAAO;QAAE,GAAGW,aAAa;QAAEf;MAAO,CAAC;IACjD,CAAC;IAED,OAAO;MACLP,IAAI,EAAEA,CAAA,KAAMrB,SAAS,CAACsB,eAAe,CAAC,IAAI,CAACZ,EAAE,EAAEO,KAAK,CAAC;MACrDoC,SAAS,EAAGC,QAAkD,IAAK;QACjE,MAAMC,kBAAkB,GAAGnD,YAAY,CAACoD,WAAW,CACjDlD,4BAA4B,EAC3BmD,GAAkC,IAAK;UACtC,MAAM;YAAEC,SAAS;YAAE1B;UAAQ,CAAC,GAAGyB,GAAG;UAClC,IAAIC,SAAS,KAAK,IAAI,CAAChD,EAAE,IAAI+C,GAAG,CAACxC,KAAK,KAAKA,KAAK,EAAE;UAClDU,qBAAqB,GAAGK,OAAO;UAC/BD,QAAQ,CAACC,OAAO,CAAC;UACjBsB,QAAQ,CAAC;YACPI,SAAS;YACTzC,KAAK,EAAEwC,GAAG,CAACxC,KAAK;YAChB,GAAGyB,mBAAmB,CAACV,OAAO;UAChC,CAAC,CAAC;QACJ,CAAC,CACF;QACD,MAAM2B,WAAW,GAAGvD,YAAY,CAACoD,WAAW,CAC1CjD,gCAAgC,EAC/BkD,GAAkC,IAAK;UACtC,MAAM;YAAEC,SAAS;YAAE1B;UAAQ,CAAC,GAAGyB,GAAG;UAClC,IAAIC,SAAS,KAAK,IAAI,CAAChD,EAAE,IAAI+C,GAAG,CAACxC,KAAK,KAAKA,KAAK,EAAE;UAClD,MAAM2C,WAAW,GAAG;YAClB,GAAGjC,qBAAqB;YACxB,GAAGK;UACL,CAAC;UACDD,QAAQ,CAAC6B,WAAW,CAAC;UACrBN,QAAQ,CAAC;YACPI,SAAS;YACTzC,KAAK,EAAEwC,GAAG,CAACxC,KAAK;YAChB,GAAGyB,mBAAmB,CAACkB,WAAW,CAAC;YACnCC,WAAW,EAAE;UACf,CAAC,CAAC;UACFN,kBAAkB,CAACO,MAAM,EAAE;UAC3BH,WAAW,CAACG,MAAM,EAAE;QACtB,CAAC,CACF;MACH;IACF,CAAC;EACH;EAEA,MAAMC,OAAOA,CAAA,EAAG;IACd,OAAO/D,SAAS,CAACgE,cAAc,CAAC,IAAI,CAACtD,EAAE,CAAC;EAC1C;AACF;AAEA,OAAO,eAAeuD,WAAWA,CAAA,EAEN;EAAA,IADzB;IAAEC;EAAgC,CAAC,GAAApD,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;EAExC,MAAMJ,EAAE,GAAG,MAAMV,SAAS,CAACmE,WAAW,CAACD,QAAQ,CAAC;EAChD,OAAO,IAAI1D,cAAc,CAACE,EAAE,CAAC;AAC/B;AAEA,OAAO,eAAe0D,iBAAiBA,CAAA,EAAkB;EACvD,OAAOpE,SAAS,CAACqE,kBAAkB,EAAE;AACvC"}
|
|
@@ -36,6 +36,12 @@ export type TranscribeRealtimeOptions = TranscribeOptions & {
|
|
|
36
36
|
* the recommended value will be <= 30 seconds. (Default: 30)
|
|
37
37
|
*/
|
|
38
38
|
realtimeAudioSec?: number;
|
|
39
|
+
/**
|
|
40
|
+
* Optimize audio transcription performance by slicing audio samples when `realtimeAudioSec` > 30.
|
|
41
|
+
* Set `realtimeAudioSliceSec` < 30 so performance improvements can be achieved in the Whisper hard constraint (processes the audio in chunks of 30 seconds).
|
|
42
|
+
* (Default: Equal to `realtimeMaxAudioSec`)
|
|
43
|
+
*/
|
|
44
|
+
realtimeAudioSliceSec?: number;
|
|
39
45
|
};
|
|
40
46
|
export type TranscribeResult = {
|
|
41
47
|
result: string;
|
|
@@ -50,24 +56,36 @@ export type TranscribeRealtimeEvent = {
|
|
|
50
56
|
jobId: number;
|
|
51
57
|
/** Is capturing audio, when false, the event is the final result */
|
|
52
58
|
isCapturing: boolean;
|
|
59
|
+
isStoppedByAction?: boolean;
|
|
60
|
+
code: number;
|
|
61
|
+
data?: TranscribeResult;
|
|
62
|
+
error?: string;
|
|
63
|
+
processTime: number;
|
|
64
|
+
recordingTime: number;
|
|
65
|
+
slices?: Array<{
|
|
66
|
+
code: number;
|
|
67
|
+
error?: string;
|
|
68
|
+
data?: TranscribeResult;
|
|
69
|
+
processTime: number;
|
|
70
|
+
recordingTime: number;
|
|
71
|
+
}>;
|
|
72
|
+
};
|
|
73
|
+
export type TranscribeRealtimeNativePayload = {
|
|
74
|
+
/** Is capturing audio, when false, the event is the final result */
|
|
75
|
+
isCapturing: boolean;
|
|
76
|
+
isStoppedByAction?: boolean;
|
|
53
77
|
code: number;
|
|
54
78
|
processTime: number;
|
|
55
79
|
recordingTime: number;
|
|
80
|
+
isUseSlices: boolean;
|
|
81
|
+
sliceIndex: number;
|
|
56
82
|
data?: TranscribeResult;
|
|
57
83
|
error?: string;
|
|
58
84
|
};
|
|
59
85
|
export type TranscribeRealtimeNativeEvent = {
|
|
60
86
|
contextId: number;
|
|
61
87
|
jobId: number;
|
|
62
|
-
payload:
|
|
63
|
-
/** Is capturing audio, when false, the event is the final result */
|
|
64
|
-
isCapturing: boolean;
|
|
65
|
-
code: number;
|
|
66
|
-
processTime: number;
|
|
67
|
-
recordingTime: number;
|
|
68
|
-
data?: TranscribeResult;
|
|
69
|
-
error?: string;
|
|
70
|
-
};
|
|
88
|
+
payload: TranscribeRealtimeNativePayload;
|
|
71
89
|
};
|
|
72
90
|
declare class WhisperContext {
|
|
73
91
|
id: number;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.tsx"],"names":[],"mappings":"AAkCA,MAAM,MAAM,iBAAiB,GAAG;IAC9B,wDAAwD;IACxD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,iEAAiE;IACjE,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,+DAA+D;IAC/D,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,qDAAqD;IACrD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,2CAA2C;IAC3C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,oCAAoC;IACpC,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,2CAA2C;IAC3C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kCAAkC;IAClC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,mDAAmD;IACnD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,mCAAmC;IACnC,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,gCAAgC;IAChC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,wCAAwC;IACxC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,8CAA8C;IAC9C,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,qBAAqB;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB,CAAA;AAED,MAAM,MAAM,yBAAyB,GAAG,iBAAiB,GAAG;IAC1D;;;;OAIG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.tsx"],"names":[],"mappings":"AAkCA,MAAM,MAAM,iBAAiB,GAAG;IAC9B,wDAAwD;IACxD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,iEAAiE;IACjE,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,+DAA+D;IAC/D,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,qDAAqD;IACrD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,2CAA2C;IAC3C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,oCAAoC;IACpC,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,2CAA2C;IAC3C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kCAAkC;IAClC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,mDAAmD;IACnD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,mCAAmC;IACnC,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,gCAAgC;IAChC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,wCAAwC;IACxC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,8CAA8C;IAC9C,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,qBAAqB;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB,CAAA;AAED,MAAM,MAAM,yBAAyB,GAAG,iBAAiB,GAAG;IAC1D;;;;OAIG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B;;;;OAIG;IACH,qBAAqB,CAAC,EAAE,MAAM,CAAA;CAC/B,CAAA;AAED,MAAM,MAAM,gBAAgB,GAAG;IAC7B,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,KAAK,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,EAAE,EAAE,MAAM,CAAC;QACX,EAAE,EAAE,MAAM,CAAC;KACZ,CAAC,CAAC;CACJ,CAAA;AAED,MAAM,MAAM,uBAAuB,GAAG;IACpC,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,oEAAoE;IACpE,WAAW,EAAE,OAAO,CAAC;IACrB,iBAAiB,CAAC,EAAE,OAAO,CAAC;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,CAAC,EAAE,gBAAgB,CAAC;IACxB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,MAAM,CAAC;IACtB,MAAM,CAAC,EAAE,KAAK,CAAC;QACb,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,IAAI,CAAC,EAAE,gBAAgB,CAAC;QACxB,WAAW,EAAE,MAAM,CAAC;QACpB,aAAa,EAAE,MAAM,CAAC;KACvB,CAAC,CAAC;CACJ,CAAA;AAED,MAAM,MAAM,+BAA+B,GAAG;IAC5C,oEAAoE;IACpE,WAAW,EAAE,OAAO,CAAC;IACrB,iBAAiB,CAAC,EAAE,OAAO,CAAC;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,MAAM,CAAC;IACtB,WAAW,EAAE,OAAO,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,IAAI,CAAC,EAAE,gBAAgB,CAAC;IACxB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB,CAAA;AAED,MAAM,MAAM,6BAA6B,GAAG;IAC1C,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,+BAA+B,CAAC;CAC1C,CAAA;AAED,cAAM,cAAc;IAClB,EAAE,EAAE,MAAM,CAAA;gBAEE,EAAE,EAAE,MAAM;IAItB,4BAA4B;IAC5B,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,GAAE,iBAAsB,GAAG;QACzD,0BAA0B;QAC1B,IAAI,EAAE,MAAM,IAAI,CAAC;QACjB,gCAAgC;QAChC,OAAO,EAAE,OAAO,CAAC,gBAAgB,CAAC,CAAC;KACpC;IAQD,yFAAyF;IACnF,kBAAkB,CAAC,OAAO,GAAE,yBAA8B,GAAG,OAAO,CAAC;QACzE,mCAAmC;QACnC,IAAI,EAAE,MAAM,IAAI,CAAC;QACjB,8CAA8C;QAC9C,SAAS,EAAE,CAAC,QAAQ,EAAE,CAAC,KAAK,EAAE,uBAAuB,KAAK,IAAI,KAAK,IAAI,CAAC;KACzE,CAAC;IA0FI,OAAO;CAGd;AAED,wBAAsB,WAAW,CAC/B,EAAE,QAAQ,EAAE,GAAE;IAAE,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAO,GACvC,OAAO,CAAC,cAAc,CAAC,CAGzB;AAED,wBAAsB,iBAAiB,IAAI,OAAO,CAAC,IAAI,CAAC,CAEvD"}
|
package/package.json
CHANGED
package/src/index.tsx
CHANGED
|
@@ -71,6 +71,12 @@ export type TranscribeRealtimeOptions = TranscribeOptions & {
|
|
|
71
71
|
* the recommended value will be <= 30 seconds. (Default: 30)
|
|
72
72
|
*/
|
|
73
73
|
realtimeAudioSec?: number,
|
|
74
|
+
/**
|
|
75
|
+
* Optimize audio transcription performance by slicing audio samples when `realtimeAudioSec` > 30.
|
|
76
|
+
* Set `realtimeAudioSliceSec` < 30 so performance improvements can be achieved in the Whisper hard constraint (processes the audio in chunks of 30 seconds).
|
|
77
|
+
* (Default: Equal to `realtimeMaxAudioSec`)
|
|
78
|
+
*/
|
|
79
|
+
realtimeAudioSliceSec?: number
|
|
74
80
|
}
|
|
75
81
|
|
|
76
82
|
export type TranscribeResult = {
|
|
@@ -87,9 +93,30 @@ export type TranscribeRealtimeEvent = {
|
|
|
87
93
|
jobId: number,
|
|
88
94
|
/** Is capturing audio, when false, the event is the final result */
|
|
89
95
|
isCapturing: boolean,
|
|
96
|
+
isStoppedByAction?: boolean,
|
|
90
97
|
code: number,
|
|
98
|
+
data?: TranscribeResult,
|
|
99
|
+
error?: string,
|
|
91
100
|
processTime: number,
|
|
92
101
|
recordingTime: number,
|
|
102
|
+
slices?: Array<{
|
|
103
|
+
code: number,
|
|
104
|
+
error?: string,
|
|
105
|
+
data?: TranscribeResult,
|
|
106
|
+
processTime: number,
|
|
107
|
+
recordingTime: number,
|
|
108
|
+
}>,
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
export type TranscribeRealtimeNativePayload = {
|
|
112
|
+
/** Is capturing audio, when false, the event is the final result */
|
|
113
|
+
isCapturing: boolean,
|
|
114
|
+
isStoppedByAction?: boolean,
|
|
115
|
+
code: number,
|
|
116
|
+
processTime: number,
|
|
117
|
+
recordingTime: number,
|
|
118
|
+
isUseSlices: boolean,
|
|
119
|
+
sliceIndex: number,
|
|
93
120
|
data?: TranscribeResult,
|
|
94
121
|
error?: string,
|
|
95
122
|
}
|
|
@@ -97,15 +124,7 @@ export type TranscribeRealtimeEvent = {
|
|
|
97
124
|
export type TranscribeRealtimeNativeEvent = {
|
|
98
125
|
contextId: number,
|
|
99
126
|
jobId: number,
|
|
100
|
-
payload:
|
|
101
|
-
/** Is capturing audio, when false, the event is the final result */
|
|
102
|
-
isCapturing: boolean,
|
|
103
|
-
code: number,
|
|
104
|
-
processTime: number,
|
|
105
|
-
recordingTime: number,
|
|
106
|
-
data?: TranscribeResult,
|
|
107
|
-
error?: string,
|
|
108
|
-
},
|
|
127
|
+
payload: TranscribeRealtimeNativePayload,
|
|
109
128
|
}
|
|
110
129
|
|
|
111
130
|
class WhisperContext {
|
|
@@ -138,9 +157,52 @@ class WhisperContext {
|
|
|
138
157
|
}> {
|
|
139
158
|
const jobId: number = Math.floor(Math.random() * 10000)
|
|
140
159
|
await RNWhisper.startRealtimeTranscribe(this.id, jobId, options)
|
|
141
|
-
let
|
|
142
|
-
|
|
143
|
-
|
|
160
|
+
let lastTranscribePayload: TranscribeRealtimeNativePayload
|
|
161
|
+
|
|
162
|
+
const slices: TranscribeRealtimeNativePayload[] = []
|
|
163
|
+
let sliceIndex: number = 0
|
|
164
|
+
let tOffset: number = 0
|
|
165
|
+
|
|
166
|
+
const putSlice = (payload: TranscribeRealtimeNativePayload) => {
|
|
167
|
+
if (!payload.isUseSlices) return
|
|
168
|
+
if (sliceIndex !== payload.sliceIndex) {
|
|
169
|
+
const { segments = [] } = slices[sliceIndex]?.data || {}
|
|
170
|
+
tOffset = segments[segments.length - 1]?.t1 || 0
|
|
171
|
+
}
|
|
172
|
+
({ sliceIndex } = payload)
|
|
173
|
+
slices[sliceIndex] = {
|
|
174
|
+
...payload,
|
|
175
|
+
data: payload.data ? {
|
|
176
|
+
...payload.data,
|
|
177
|
+
segments: payload.data.segments.map((segment) => ({
|
|
178
|
+
...segment,
|
|
179
|
+
t0: segment.t0 + tOffset,
|
|
180
|
+
t1: segment.t1 + tOffset,
|
|
181
|
+
})) || [],
|
|
182
|
+
} : undefined,
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
const mergeSlicesIfNeeded = (payload: TranscribeRealtimeNativePayload): TranscribeRealtimeNativePayload => {
|
|
187
|
+
if (!payload.isUseSlices) return payload
|
|
188
|
+
|
|
189
|
+
const mergedPayload: any = {}
|
|
190
|
+
slices.forEach(
|
|
191
|
+
(slice) => {
|
|
192
|
+
mergedPayload.data = {
|
|
193
|
+
result: (mergedPayload.data?.result || '') + (slice.data?.result || ''),
|
|
194
|
+
segments: [
|
|
195
|
+
...(mergedPayload?.data?.segments || []),
|
|
196
|
+
...(slice.data?.segments || []),
|
|
197
|
+
],
|
|
198
|
+
}
|
|
199
|
+
mergedPayload.processTime = slice.processTime
|
|
200
|
+
mergedPayload.recordingTime = (mergedPayload?.recordingTime || 0) + slice.recordingTime
|
|
201
|
+
}
|
|
202
|
+
)
|
|
203
|
+
return { ...payload, ...mergedPayload, slices }
|
|
204
|
+
}
|
|
205
|
+
|
|
144
206
|
return {
|
|
145
207
|
stop: () => RNWhisper.abortTranscribe(this.id, jobId),
|
|
146
208
|
subscribe: (callback: (event: TranscribeRealtimeEvent) => void) => {
|
|
@@ -150,22 +212,34 @@ class WhisperContext {
|
|
|
150
212
|
const { contextId, payload } = evt
|
|
151
213
|
if (contextId !== this.id || evt.jobId !== jobId) return
|
|
152
214
|
lastTranscribePayload = payload
|
|
153
|
-
|
|
154
|
-
|
|
215
|
+
putSlice(payload)
|
|
216
|
+
callback({
|
|
217
|
+
contextId,
|
|
218
|
+
jobId: evt.jobId,
|
|
219
|
+
...mergeSlicesIfNeeded(payload),
|
|
220
|
+
})
|
|
155
221
|
}
|
|
156
222
|
)
|
|
157
|
-
removeTranscribe = transcribeListener.remove
|
|
158
223
|
const endListener = EventEmitter.addListener(
|
|
159
224
|
EVENT_ON_REALTIME_TRANSCRIBE_END,
|
|
160
225
|
(evt: TranscribeRealtimeNativeEvent) => {
|
|
161
|
-
const { contextId } = evt
|
|
226
|
+
const { contextId, payload } = evt
|
|
162
227
|
if (contextId !== this.id || evt.jobId !== jobId) return
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
228
|
+
const lastPayload = {
|
|
229
|
+
...lastTranscribePayload,
|
|
230
|
+
...payload,
|
|
231
|
+
}
|
|
232
|
+
putSlice(lastPayload)
|
|
233
|
+
callback({
|
|
234
|
+
contextId,
|
|
235
|
+
jobId: evt.jobId,
|
|
236
|
+
...mergeSlicesIfNeeded(lastPayload),
|
|
237
|
+
isCapturing: false
|
|
238
|
+
})
|
|
239
|
+
transcribeListener.remove()
|
|
240
|
+
endListener.remove()
|
|
166
241
|
}
|
|
167
242
|
)
|
|
168
|
-
removeEnd = endListener.remove
|
|
169
243
|
},
|
|
170
244
|
}
|
|
171
245
|
}
|