whisper.rn 0.4.0-rc.1 → 0.4.0-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <Workspace
3
+ version = "1.0">
4
+ </Workspace>
@@ -0,0 +1,8 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3
+ <plist version="1.0">
4
+ <dict>
5
+ <key>IDEDidComputeMac32BitWarning</key>
6
+ <true/>
7
+ </dict>
8
+ </plist>
@@ -0,0 +1,19 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3
+ <plist version="1.0">
4
+ <dict>
5
+ <key>SchemeUserState</key>
6
+ <dict>
7
+ <key>RNWhisper.xcscheme_^#shared#^_</key>
8
+ <dict>
9
+ <key>orderHint</key>
10
+ <integer>0</integer>
11
+ </dict>
12
+ <key>WhisperCpp.xcscheme_^#shared#^_</key>
13
+ <dict>
14
+ <key>orderHint</key>
15
+ <integer>0</integer>
16
+ </dict>
17
+ </dict>
18
+ </dict>
19
+ </plist>
@@ -27,6 +27,12 @@ typedef struct {
27
27
  int sliceIndex;
28
28
  int transcribeSliceIndex;
29
29
  int audioSliceSec;
30
+ NSString* audioOutputPath;
31
+
32
+ bool useVad;
33
+ int vadSec;
34
+ float vadThold;
35
+ float vadFreqThold;
30
36
 
31
37
  AudioQueueRef queue;
32
38
  AudioStreamBasicDescription dataFormat;
@@ -53,6 +53,13 @@
53
53
  int realtimeAudioSliceSec = options[@"realtimeAudioSliceSec"] != nil ? [options[@"realtimeAudioSliceSec"] intValue] : 0;
54
54
  int audioSliceSec = realtimeAudioSliceSec > 0 && realtimeAudioSliceSec < maxAudioSec ? realtimeAudioSliceSec : maxAudioSec;
55
55
 
56
+ self->recordState.audioOutputPath = options[@"audioOutputPath"];
57
+
58
+ self->recordState.useVad = options[@"useVad"] != nil ? [options[@"useVad"] boolValue] : false;
59
+ self->recordState.vadSec = options[@"vadMs"] != nil ? [options[@"vadMs"] intValue] / 1000 : 2;
60
+ self->recordState.vadThold = options[@"vadThold"] != nil ? [options[@"vadThold"] floatValue] : 0.6f;
61
+ self->recordState.vadFreqThold = options[@"vadFreqThold"] != nil ? [options[@"vadFreqThold"] floatValue] : 100.0f;
62
+
56
63
  self->recordState.audioSliceSec = audioSliceSec;
57
64
  self->recordState.isUseSlices = audioSliceSec < maxAudioSec;
58
65
 
@@ -90,18 +97,15 @@
90
97
  bool vad(RNWhisperContextRecordState *state, int16_t* audioBufferI16, int nSamples, int n)
91
98
  {
92
99
  bool isSpeech = true;
93
- if (!state->isTranscribing && state->options[@"useVad"]) {
94
- int vadSec = state->options[@"vadMs"] != nil ? [state->options[@"vadMs"] intValue] / 1000 : 2;
95
- int sampleSize = vadSec * WHISPER_SAMPLE_RATE;
100
+ if (!state->isTranscribing && state->useVad) {
101
+ int sampleSize = state->vadSec * WHISPER_SAMPLE_RATE;
96
102
  if (nSamples + n > sampleSize) {
97
103
  int start = nSamples + n - sampleSize;
98
104
  std::vector<float> audioBufferF32Vec(sampleSize);
99
105
  for (int i = 0; i < sampleSize; i++) {
100
106
  audioBufferF32Vec[i] = (float)audioBufferI16[i + start] / 32768.0f;
101
107
  }
102
- float vadThold = state->options[@"vadThold"] != nil ? [state->options[@"vadThold"] floatValue] : 0.6f;
103
- float vadFreqThold = state->options[@"vadFreqThold"] != nil ? [state->options[@"vadFreqThold"] floatValue] : 100.0f;
104
- isSpeech = rn_whisper_vad_simple(audioBufferF32Vec, WHISPER_SAMPLE_RATE, 1000, vadThold, vadFreqThold, false);
108
+ isSpeech = rn_whisper_vad_simple(audioBufferF32Vec, WHISPER_SAMPLE_RATE, 1000, state->vadThold, state->vadFreqThold, false);
105
109
  NSLog(@"[RNWhisper] VAD result: %d", isSpeech);
106
110
  } else {
107
111
  isSpeech = false;
@@ -257,12 +261,12 @@ void AudioInputCallback(void * inUserData,
257
261
  result[@"isCapturing"] = @(false);
258
262
 
259
263
  // Save wav if needed
260
- if (state->options[@"audioOutputPath"] != nil) {
264
+ if (state->audioOutputPath != nil) {
261
265
  // TODO: Append in real time so we don't need to keep all slices & also reduce memory usage
262
266
  [RNWhisperAudioUtils
263
267
  saveWavFile:[RNWhisperAudioUtils concatShortBuffers:state->shortBufferSlices
264
268
  sliceNSamples:state->sliceNSamples]
265
- audioOutputFile:state->options[@"audioOutputPath"]
269
+ audioOutputFile:state->audioOutputPath
266
270
  ];
267
271
  }
268
272
 
@@ -408,6 +412,10 @@ struct rnwhisper_segments_callback_data {
408
412
  rn_whisper_abort_transcribe(jobId);
409
413
  if (self->recordState.isRealtime && self->recordState.isCapturing) {
410
414
  [self stopAudio];
415
+ if (!self->recordState.isTranscribing) {
416
+ // Handle for VAD case
417
+ self->recordState.transcribeHandler(jobId, @"end", @{});
418
+ }
411
419
  }
412
420
  self->recordState.isCapturing = false;
413
421
  self->recordState.isStoppedByAction = true;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "whisper.rn",
3
- "version": "0.4.0-rc.1",
3
+ "version": "0.4.0-rc.2",
4
4
  "description": "React Native binding of whisper.cpp",
5
5
  "main": "lib/commonjs/index",
6
6
  "module": "lib/module/index",