npm - whisper.rn - Versions diffs - 0.4.0-rc.1 → 0.4.0-rc.2 - Mend

whisper.rn 0.4.0-rc.1 → 0.4.0-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/ios/RNWhisper.xcodeproj/project.xcworkspace/contents.xcworkspacedata ADDED Viewed

@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Workspace
+   version = "1.0">
+</Workspace>

package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist ADDED Viewed

@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>IDEDidComputeMac32BitWarning</key>
+	<true/>
+</dict>
+</plist>

package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcuserdata/jhen.xcuserdatad/UserInterfaceState.xcuserstate ADDED Viewed

Binary file

package/ios/RNWhisper.xcodeproj/xcuserdata/jhen.xcuserdatad/xcschemes/xcschememanagement.plist ADDED Viewed

@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>SchemeUserState</key>
+	<dict>
+		<key>RNWhisper.xcscheme_^#shared#^_</key>
+		<dict>
+			<key>orderHint</key>
+			<integer>0</integer>
+		</dict>
+		<key>WhisperCpp.xcscheme_^#shared#^_</key>
+		<dict>
+			<key>orderHint</key>
+			<integer>0</integer>
+		</dict>
+	</dict>
+</dict>
+</plist>

package/ios/RNWhisperContext.h CHANGED Viewed

@@ -27,6 +27,12 @@ typedef struct {
     int sliceIndex;
     int transcribeSliceIndex;
     int audioSliceSec;
+    NSString* audioOutputPath;
+    bool useVad;
+    int vadSec;
+    float vadThold;
+    float vadFreqThold;
     AudioQueueRef queue;
     AudioStreamBasicDescription dataFormat;

package/ios/RNWhisperContext.mm CHANGED Viewed

@@ -53,6 +53,13 @@
     int realtimeAudioSliceSec = options[@"realtimeAudioSliceSec"] != nil ? [options[@"realtimeAudioSliceSec"] intValue] : 0;
     int audioSliceSec = realtimeAudioSliceSec > 0 && realtimeAudioSliceSec < maxAudioSec ? realtimeAudioSliceSec : maxAudioSec;
+    self->recordState.audioOutputPath = options[@"audioOutputPath"];
+    self->recordState.useVad = options[@"useVad"] != nil ? [options[@"useVad"] boolValue] : false;
+    self->recordState.vadSec = options[@"vadMs"] != nil ? [options[@"vadMs"] intValue] / 1000 : 2;
+    self->recordState.vadThold = options[@"vadThold"] != nil ? [options[@"vadThold"] floatValue] : 0.6f;
+    self->recordState.vadFreqThold = options[@"vadFreqThold"] != nil ? [options[@"vadFreqThold"] floatValue] : 100.0f;
     self->recordState.audioSliceSec = audioSliceSec;
     self->recordState.isUseSlices = audioSliceSec < maxAudioSec;
@@ -90,18 +97,15 @@
 bool vad(RNWhisperContextRecordState *state, int16_t* audioBufferI16, int nSamples, int n)
 {
     bool isSpeech = true;
-    if (!state->isTranscribing && state->options[@"useVad"]) {
-        int vadSec = state->options[@"vadMs"] != nil ? [state->options[@"vadMs"] intValue] / 1000 : 2;
-        int sampleSize = vadSec * WHISPER_SAMPLE_RATE;
+    if (!state->isTranscribing && state->useVad) {
+        int sampleSize = state->vadSec * WHISPER_SAMPLE_RATE;
         if (nSamples + n > sampleSize) {
             int start = nSamples + n - sampleSize;
             std::vector<float> audioBufferF32Vec(sampleSize);
             for (int i = 0; i < sampleSize; i++) {
                 audioBufferF32Vec[i] = (float)audioBufferI16[i + start] / 32768.0f;
             }
-            float vadThold = state->options[@"vadThold"] != nil ? [state->options[@"vadThold"] floatValue] : 0.6f;
-            float vadFreqThold = state->options[@"vadFreqThold"] != nil ? [state->options[@"vadFreqThold"] floatValue] : 100.0f;
-            isSpeech = rn_whisper_vad_simple(audioBufferF32Vec, WHISPER_SAMPLE_RATE, 1000, vadThold, vadFreqThold, false);
+            isSpeech = rn_whisper_vad_simple(audioBufferF32Vec, WHISPER_SAMPLE_RATE, 1000, state->vadThold, state->vadFreqThold, false);
             NSLog(@"[RNWhisper] VAD result: %d", isSpeech);
         } else {
             isSpeech = false;
@@ -257,12 +261,12 @@ void AudioInputCallback(void * inUserData,
         result[@"isCapturing"] = @(false);
         // Save wav if needed
-        if (state->options[@"audioOutputPath"] != nil) {
+        if (state->audioOutputPath != nil) {
             // TODO: Append in real time so we don't need to keep all slices & also reduce memory usage
             [RNWhisperAudioUtils
                 saveWavFile:[RNWhisperAudioUtils concatShortBuffers:state->shortBufferSlices
                                 sliceNSamples:state->sliceNSamples]
-                audioOutputFile:state->options[@"audioOutputPath"]
+                audioOutputFile:state->audioOutputPath
             ];
         }
@@ -408,6 +412,10 @@ struct rnwhisper_segments_callback_data {
     rn_whisper_abort_transcribe(jobId);
     if (self->recordState.isRealtime && self->recordState.isCapturing) {
         [self stopAudio];
+        if (!self->recordState.isTranscribing) {
+            // Handle for VAD case
+            self->recordState.transcribeHandler(jobId, @"end", @{});
+        }
     }
     self->recordState.isCapturing = false;
     self->recordState.isStoppedByAction = true;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "whisper.rn",
-  "version": "0.4.0-rc.1",
+  "version": "0.4.0-rc.2",
   "description": "React Native binding of whisper.cpp",
   "main": "lib/commonjs/index",
   "module": "lib/module/index",