npm - whisper.rn - Versions diffs - 0.4.1 → 0.4.2 - Mend

whisper.rn 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/android/src/main/java/com/rnwhisper/RNWhisper.java +3 -2
package/android/src/main/java/com/rnwhisper/WhisperVadContext.java +1 -57
package/ios/RNWhisper.mm +7 -7
package/ios/RNWhisperVadContext.h +1 -1
package/ios/RNWhisperVadContext.mm +2 -6
package/package.json +1 -1

package/android/src/main/java/com/rnwhisper/RNWhisper.java CHANGED Viewed

@@ -415,7 +415,8 @@ public class RNWhisper implements LifecycleEventListener {
       @Override
       protected WritableArray doInBackground(Void... voids) {
         try {
-          return vadContext.detectSpeech(audioDataBase64, options);
+          float[] audioData = AudioUtils.decodePcmData(audioDataBase64);
+          return vadContext.detectSpeechWithAudioData(audioData, audioData.length, options);
         } catch (Exception e) {
           exception = e;
           return null;
@@ -468,7 +469,7 @@ public class RNWhisper implements LifecycleEventListener {
             throw new Exception("Failed to load audio file: " + filePathOrBase64);
           }
-          return vadContext.detectSpeechWithAudioData(audioData, options);
+          return vadContext.detectSpeechWithAudioData(audioData, audioData.length, options);
         } catch (Exception e) {
           exception = e;
           return null;

package/android/src/main/java/com/rnwhisper/WhisperVadContext.java CHANGED Viewed

@@ -25,70 +25,14 @@ public class WhisperVadContext {
         this.reactContext = reactContext;
     }
-    public WritableArray detectSpeech(String audioDataBase64, ReadableMap options) throws Exception {
+    public WritableArray detectSpeechWithAudioData(float[] audioData, int numSamples, ReadableMap options) throws Exception {
         if (vadContext == 0) {
             throw new Exception("VAD context is null");
         }
-        // Decode base64 audio data to float array
-        byte[] audioBytes = Base64.decode(audioDataBase64, Base64.DEFAULT);
-        int numSamples = audioBytes.length / 4; // 4 bytes per float
-        float[] audioData = new float[numSamples];
-        for (int i = 0; i < numSamples; i++) {
-            int intBits = (audioBytes[i * 4] & 0xFF) |
-                         ((audioBytes[i * 4 + 1] & 0xFF) << 8) |
-                         ((audioBytes[i * 4 + 2] & 0xFF) << 16) |
-                         ((audioBytes[i * 4 + 3] & 0xFF) << 24);
-            audioData[i] = Float.intBitsToFloat(intBits);
-        }
         return processVadDetection(audioData, numSamples, options);
     }
-    public WritableArray detectSpeechFile(String filePathOrBase64, ReadableMap options) throws Exception {
-        if (vadContext == 0) {
-            throw new Exception("VAD context is null");
-        }
-        // Follow the same pattern as transcribeFile
-        String filePath = filePathOrBase64;
-        // Handle HTTP downloads
-        if (filePathOrBase64.startsWith("http://") || filePathOrBase64.startsWith("https://")) {
-            // Note: This would require access to the downloader, but for now we'll throw an error
-            throw new Exception("HTTP URLs not supported in VAD file detection. Please download the file first.");
-        }
-        float[] audioData;
-        // Check for resource identifier (bundled assets)
-        int resId = getResourceIdentifier(filePath);
-        if (resId > 0) {
-            audioData = AudioUtils.decodeWaveFile(reactContext.getResources().openRawResource(resId));
-        } else if (filePathOrBase64.startsWith("data:audio/wav;base64,")) {
-            // Handle base64 WAV data
-            audioData = AudioUtils.decodeWaveData(filePathOrBase64);
-        } else {
-            // Handle regular file path
-            audioData = AudioUtils.decodeWaveFile(new java.io.FileInputStream(new java.io.File(filePath)));
-        }
-        if (audioData == null) {
-            throw new Exception("Failed to load audio file: " + filePathOrBase64);
-        }
-        return processVadDetection(audioData, audioData.length, options);
-    }
-    public WritableArray detectSpeechWithAudioData(float[] audioData, ReadableMap options) throws Exception {
-        if (vadContext == 0) {
-            throw new Exception("VAD context is null");
-        }
-        return processVadDetection(audioData, audioData.length, options);
-    }
     private int getResourceIdentifier(String filePath) {
         int identifier = reactContext.getResources().getIdentifier(
             filePath,

package/ios/RNWhisper.mm CHANGED Viewed

@@ -507,13 +507,16 @@ RCT_REMAP_METHOD(vadDetectSpeech,
     }
     // Decode base64 audio data
-    NSData *audioData = [[NSData alloc] initWithBase64EncodedString:audioDataBase64 options:0];
-    if (audioData == nil) {
+    NSData *pcmData = [[NSData alloc] initWithBase64EncodedString:audioDataBase64 options:0];
+    if (pcmData == nil) {
         reject(@"whisper_vad_error", @"Invalid audio data", nil);
         return;
     }
-    NSArray *segments = [vadContext detectSpeech:audioData options:options];
+    int count = 0;
+    float *data = [RNWhisperAudioUtils decodeWaveData:pcmData count:&count cutHeader:NO];
+    NSArray *segments = [vadContext detectSpeech:data samplesCount:count options:options];
     resolve(segments);
 }
@@ -549,10 +552,7 @@ RCT_REMAP_METHOD(vadDetectSpeechFile,
         return;
     }
-    // Convert float32 data to NSData for VAD context
-    NSData *audioData = [NSData dataWithBytes:data length:count * sizeof(float)];
-    NSArray *segments = [vadContext detectSpeech:audioData options:options];
+    NSArray *segments = [vadContext detectSpeech:data samplesCount:count options:options];
     resolve(segments);
 }

package/ios/RNWhisperVadContext.h CHANGED Viewed

@@ -23,7 +23,7 @@
 - (NSString *)reasonNoMetal;
 - (struct whisper_vad_context *)getVadContext;
 - (dispatch_queue_t)getDispatchQueue;
-- (NSArray *)detectSpeech:(NSData *)audioData options:(NSDictionary *)options;
+- (NSArray *)detectSpeech:(float *)samples samplesCount:(int)samplesCount options:(NSDictionary *)options;
 - (void)invalidate;
 @end

package/ios/RNWhisperVadContext.mm CHANGED Viewed

@@ -73,18 +73,14 @@
     return dQueue;
 }
-- (NSArray *)detectSpeech:(NSData *)audioData options:(NSDictionary *)options {
+- (NSArray *)detectSpeech:(float *)samples samplesCount:(int)samplesCount options:(NSDictionary *)options {
     if (vctx == NULL) {
         NSLog(@"VAD context is null");
         return @[];
     }
-    // Convert NSData to float array
-    const float *samples = (const float *)[audioData bytes];
-    int n_samples = (int)[audioData length] / sizeof(float);
     // Run VAD detection
-    bool speechDetected = whisper_vad_detect_speech(vctx, samples, n_samples);
+    bool speechDetected = whisper_vad_detect_speech(vctx, samples, samplesCount);
     if (!speechDetected) {
         return @[];
     }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "whisper.rn",
-  "version": "0.4.1",
+  "version": "0.4.2",
   "description": "React Native binding of whisper.cpp",
   "main": "lib/commonjs/index",
   "module": "lib/module/index",