whisper.rn 0.4.0-rc.1 → 0.4.0-rc.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ios/RNWhisper.xcodeproj/project.xcworkspace/contents.xcworkspacedata +4 -0
- package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +8 -0
- package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcuserdata/jhen.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
- package/ios/RNWhisper.xcodeproj/xcuserdata/jhen.xcuserdatad/xcschemes/xcschememanagement.plist +19 -0
- package/ios/RNWhisperContext.h +6 -0
- package/ios/RNWhisperContext.mm +16 -8
- package/package.json +1 -1
|
Binary file
|
package/ios/RNWhisper.xcodeproj/xcuserdata/jhen.xcuserdatad/xcschemes/xcschememanagement.plist
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
+
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
|
3
|
+
<plist version="1.0">
|
|
4
|
+
<dict>
|
|
5
|
+
<key>SchemeUserState</key>
|
|
6
|
+
<dict>
|
|
7
|
+
<key>RNWhisper.xcscheme_^#shared#^_</key>
|
|
8
|
+
<dict>
|
|
9
|
+
<key>orderHint</key>
|
|
10
|
+
<integer>0</integer>
|
|
11
|
+
</dict>
|
|
12
|
+
<key>WhisperCpp.xcscheme_^#shared#^_</key>
|
|
13
|
+
<dict>
|
|
14
|
+
<key>orderHint</key>
|
|
15
|
+
<integer>0</integer>
|
|
16
|
+
</dict>
|
|
17
|
+
</dict>
|
|
18
|
+
</dict>
|
|
19
|
+
</plist>
|
package/ios/RNWhisperContext.h
CHANGED
|
@@ -27,6 +27,12 @@ typedef struct {
|
|
|
27
27
|
int sliceIndex;
|
|
28
28
|
int transcribeSliceIndex;
|
|
29
29
|
int audioSliceSec;
|
|
30
|
+
NSString* audioOutputPath;
|
|
31
|
+
|
|
32
|
+
bool useVad;
|
|
33
|
+
int vadSec;
|
|
34
|
+
float vadThold;
|
|
35
|
+
float vadFreqThold;
|
|
30
36
|
|
|
31
37
|
AudioQueueRef queue;
|
|
32
38
|
AudioStreamBasicDescription dataFormat;
|
package/ios/RNWhisperContext.mm
CHANGED
|
@@ -53,6 +53,13 @@
|
|
|
53
53
|
int realtimeAudioSliceSec = options[@"realtimeAudioSliceSec"] != nil ? [options[@"realtimeAudioSliceSec"] intValue] : 0;
|
|
54
54
|
int audioSliceSec = realtimeAudioSliceSec > 0 && realtimeAudioSliceSec < maxAudioSec ? realtimeAudioSliceSec : maxAudioSec;
|
|
55
55
|
|
|
56
|
+
self->recordState.audioOutputPath = options[@"audioOutputPath"];
|
|
57
|
+
|
|
58
|
+
self->recordState.useVad = options[@"useVad"] != nil ? [options[@"useVad"] boolValue] : false;
|
|
59
|
+
self->recordState.vadSec = options[@"vadMs"] != nil ? [options[@"vadMs"] intValue] / 1000 : 2;
|
|
60
|
+
self->recordState.vadThold = options[@"vadThold"] != nil ? [options[@"vadThold"] floatValue] : 0.6f;
|
|
61
|
+
self->recordState.vadFreqThold = options[@"vadFreqThold"] != nil ? [options[@"vadFreqThold"] floatValue] : 100.0f;
|
|
62
|
+
|
|
56
63
|
self->recordState.audioSliceSec = audioSliceSec;
|
|
57
64
|
self->recordState.isUseSlices = audioSliceSec < maxAudioSec;
|
|
58
65
|
|
|
@@ -90,18 +97,15 @@
|
|
|
90
97
|
bool vad(RNWhisperContextRecordState *state, int16_t* audioBufferI16, int nSamples, int n)
|
|
91
98
|
{
|
|
92
99
|
bool isSpeech = true;
|
|
93
|
-
if (!state->isTranscribing && state->
|
|
94
|
-
int
|
|
95
|
-
int sampleSize = vadSec * WHISPER_SAMPLE_RATE;
|
|
100
|
+
if (!state->isTranscribing && state->useVad) {
|
|
101
|
+
int sampleSize = state->vadSec * WHISPER_SAMPLE_RATE;
|
|
96
102
|
if (nSamples + n > sampleSize) {
|
|
97
103
|
int start = nSamples + n - sampleSize;
|
|
98
104
|
std::vector<float> audioBufferF32Vec(sampleSize);
|
|
99
105
|
for (int i = 0; i < sampleSize; i++) {
|
|
100
106
|
audioBufferF32Vec[i] = (float)audioBufferI16[i + start] / 32768.0f;
|
|
101
107
|
}
|
|
102
|
-
|
|
103
|
-
float vadFreqThold = state->options[@"vadFreqThold"] != nil ? [state->options[@"vadFreqThold"] floatValue] : 100.0f;
|
|
104
|
-
isSpeech = rn_whisper_vad_simple(audioBufferF32Vec, WHISPER_SAMPLE_RATE, 1000, vadThold, vadFreqThold, false);
|
|
108
|
+
isSpeech = rn_whisper_vad_simple(audioBufferF32Vec, WHISPER_SAMPLE_RATE, 1000, state->vadThold, state->vadFreqThold, false);
|
|
105
109
|
NSLog(@"[RNWhisper] VAD result: %d", isSpeech);
|
|
106
110
|
} else {
|
|
107
111
|
isSpeech = false;
|
|
@@ -257,12 +261,12 @@ void AudioInputCallback(void * inUserData,
|
|
|
257
261
|
result[@"isCapturing"] = @(false);
|
|
258
262
|
|
|
259
263
|
// Save wav if needed
|
|
260
|
-
if (state->
|
|
264
|
+
if (state->audioOutputPath != nil) {
|
|
261
265
|
// TODO: Append in real time so we don't need to keep all slices & also reduce memory usage
|
|
262
266
|
[RNWhisperAudioUtils
|
|
263
267
|
saveWavFile:[RNWhisperAudioUtils concatShortBuffers:state->shortBufferSlices
|
|
264
268
|
sliceNSamples:state->sliceNSamples]
|
|
265
|
-
audioOutputFile:state->
|
|
269
|
+
audioOutputFile:state->audioOutputPath
|
|
266
270
|
];
|
|
267
271
|
}
|
|
268
272
|
|
|
@@ -408,6 +412,10 @@ struct rnwhisper_segments_callback_data {
|
|
|
408
412
|
rn_whisper_abort_transcribe(jobId);
|
|
409
413
|
if (self->recordState.isRealtime && self->recordState.isCapturing) {
|
|
410
414
|
[self stopAudio];
|
|
415
|
+
if (!self->recordState.isTranscribing) {
|
|
416
|
+
// Handle for VAD case
|
|
417
|
+
self->recordState.transcribeHandler(jobId, @"end", @{});
|
|
418
|
+
}
|
|
411
419
|
}
|
|
412
420
|
self->recordState.isCapturing = false;
|
|
413
421
|
self->recordState.isStoppedByAction = true;
|