npm - whisper.rn - Versions diffs - 0.3.6 → 0.3.8 - Mend

whisper.rn 0.3.6 → 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/README.md +28 -0
package/android/src/main/java/com/rnwhisper/AudioUtils.java +119 -0
package/android/src/main/java/com/rnwhisper/WhisperContext.java +74 -39
package/android/src/main/jni.cpp +45 -12
package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +26 -0
package/cpp/rn-whisper.cpp +51 -0
package/cpp/rn-whisper.h +2 -1
package/ios/RNWhisper.mm +81 -22
package/ios/RNWhisper.xcodeproj/project.pbxproj +27 -3
package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcuserdata/jhen.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
package/ios/RNWhisper.xcodeproj/xcuserdata/jhen.xcuserdatad/xcschemes/xcschememanagement.plist +5 -0
package/ios/RNWhisperAudioSessionUtils.h +13 -0
package/ios/RNWhisperAudioSessionUtils.m +85 -0
package/ios/RNWhisperAudioUtils.h +9 -0
package/ios/RNWhisperAudioUtils.m +83 -0
package/ios/RNWhisperContext.h +1 -0
package/ios/RNWhisperContext.mm +101 -28
package/lib/commonjs/AudioSessionIos.js +91 -0
package/lib/commonjs/AudioSessionIos.js.map +1 -0
package/lib/commonjs/NativeRNWhisper.js.map +1 -1
package/lib/commonjs/index.js +82 -14
package/lib/commonjs/index.js.map +1 -1
package/lib/module/AudioSessionIos.js +83 -0
package/lib/module/AudioSessionIos.js.map +1 -0
package/lib/module/NativeRNWhisper.js.map +1 -1
package/lib/module/index.js +77 -14
package/lib/module/index.js.map +1 -1
package/lib/typescript/AudioSessionIos.d.ts +54 -0
package/lib/typescript/AudioSessionIos.d.ts.map +1 -0
package/lib/typescript/NativeRNWhisper.d.ts +8 -0
package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
package/lib/typescript/index.d.ts +62 -4
package/lib/typescript/index.d.ts.map +1 -1
package/package.json +1 -1
package/src/AudioSessionIos.ts +90 -0
package/src/NativeRNWhisper.ts +11 -1
package/src/index.ts +178 -28

package/ios/RNWhisper.mm CHANGED Viewed

@@ -1,6 +1,8 @@
 #import "RNWhisper.h"
 #import "RNWhisperContext.h"
 #import "RNWhisperDownloader.h"
+#import "RNWhisperAudioUtils.h"
+#import "RNWhisperAudioSessionUtils.h"
 #include <stdlib.h>
 #include <string>
@@ -87,6 +89,7 @@ RCT_REMAP_METHOD(initContext,
 - (NSArray *)supportedEvents {
   return@[
     @"@RNWhisper_onTranscribeProgress",
+    @"@RNWhisper_onTranscribeNewSegments",
     @"@RNWhisper_onRealtimeTranscribe",
     @"@RNWhisper_onRealtimeTranscribeEnd",
   ];
@@ -121,7 +124,7 @@ RCT_REMAP_METHOD(transcribeFile,
     }
     int count = 0;
-    float *waveFile = [self decodeWaveFile:path count:&count];
+    float *waveFile = [RNWhisperAudioUtils decodeWaveFile:path count:&count];
     if (waveFile == nil) {
         reject(@"whisper_error", @"Invalid file", nil);
         return;
@@ -144,6 +147,20 @@ RCT_REMAP_METHOD(transcribeFile,
                 ];
             });
         }
+        onNewSegments: ^(NSDictionary *result) {
+            if (rn_whisper_transcribe_is_aborted(jobId)) {
+                return;
+            }
+            dispatch_async(dispatch_get_main_queue(), ^{
+                [self sendEventWithName:@"@RNWhisper_onTranscribeNewSegments"
+                    body:@{
+                        @"contextId": [NSNumber numberWithInt:contextId],
+                        @"jobId": [NSNumber numberWithInt:jobId],
+                        @"result": result
+                    }
+                ];
+            });
+        }
         onEnd: ^(int code) {
             if (code != 0) {
                 free(waveFile);
@@ -242,27 +259,6 @@ RCT_REMAP_METHOD(releaseAllContexts,
     resolve(nil);
 }
-- (float *)decodeWaveFile:(NSString*)filePath count:(int *)count {
-    NSURL *url = [NSURL fileURLWithPath:filePath];
-    NSData *fileData = [NSData dataWithContentsOfURL:url];
-    if (fileData == nil) {
-        return nil;
-    }
-    NSMutableData *waveData = [[NSMutableData alloc] init];
-    [waveData appendData:[fileData subdataWithRange:NSMakeRange(44, [fileData length]-44)]];
-    const short *shortArray = (const short *)[waveData bytes];
-    int shortCount = (int) ([waveData length] / sizeof(short));
-    float *floatArray = (float *) malloc(shortCount * sizeof(float));
-    for (NSInteger i = 0; i < shortCount; i++) {
-        float floatValue = ((float)shortArray[i]) / 32767.0;
-        floatValue = MAX(floatValue, -1.0);
-        floatValue = MIN(floatValue, 1.0);
-        floatArray[i] = floatValue;
-    }
-    *count = shortCount;
-    return floatArray;
-}
 - (void)invalidate {
     [super invalidate];
@@ -283,6 +279,69 @@ RCT_REMAP_METHOD(releaseAllContexts,
     [RNWhisperDownloader clearCache];
 }
+// MARK: - AudioSessionUtils
+RCT_EXPORT_METHOD(getAudioSessionCurrentCategory:(RCTPromiseResolveBlock)resolve
+                  withRejecter:(RCTPromiseRejectBlock)reject)
+{
+    NSString *category = [RNWhisperAudioSessionUtils getCurrentCategory];
+    NSArray *options = [RNWhisperAudioSessionUtils getCurrentOptions];
+    resolve(@{
+        @"category": category,
+        @"options": options
+    });
+}
+RCT_EXPORT_METHOD(getAudioSessionCurrentMode:(RCTPromiseResolveBlock)resolve
+                 withRejecter:(RCTPromiseRejectBlock)reject)
+{
+    NSString *mode = [RNWhisperAudioSessionUtils getCurrentMode];
+    resolve(mode);
+}
+RCT_REMAP_METHOD(setAudioSessionCategory,
+                 withCategory:(NSString *)category
+                 withOptions:(NSArray *)options
+                 withResolver:(RCTPromiseResolveBlock)resolve
+                 withRejecter:(RCTPromiseRejectBlock)reject)
+{
+    NSError *error = nil;
+    [RNWhisperAudioSessionUtils setCategory:category options:options error:&error];
+    if (error != nil) {
+        reject(@"whisper_error", [NSString stringWithFormat:@"Failed to set category. Error: %@", error], nil);
+        return;
+    }
+    resolve(nil);
+}
+RCT_REMAP_METHOD(setAudioSessionMode,
+                 withMode:(NSString *)mode
+                 withResolver:(RCTPromiseResolveBlock)resolve
+                 withRejecter:(RCTPromiseRejectBlock)reject)
+{
+    NSError *error = nil;
+    [RNWhisperAudioSessionUtils setMode:mode error:&error];
+    if (error != nil) {
+        reject(@"whisper_error", [NSString stringWithFormat:@"Failed to set mode. Error: %@", error], nil);
+        return;
+    }
+    resolve(nil);
+}
+RCT_REMAP_METHOD(setAudioSessionActive,
+                 withActive:(BOOL)active
+                 withResolver:(RCTPromiseResolveBlock)resolve
+                 withRejecter:(RCTPromiseRejectBlock)reject)
+{
+    NSError *error = nil;
+    [RNWhisperAudioSessionUtils setActive:active error:&error];
+    if (error != nil) {
+        reject(@"whisper_error", [NSString stringWithFormat:@"Failed to set active. Error: %@", error], nil);
+        return;
+    }
+    resolve(nil);
+}
 #ifdef RCT_NEW_ARCH_ENABLED
 - (std::shared_ptr<facebook::react::TurboModule>)getTurboModule:
     (const facebook::react::ObjCTurboModule::InitParams &)params

package/ios/RNWhisper.xcodeproj/project.pbxproj CHANGED Viewed

@@ -8,6 +8,10 @@
 /* Begin PBXBuildFile section */
 		5E555C0D2413F4C50049A1A2 /* RNWhisper.mm in Sources */ = {isa = PBXBuildFile; fileRef = B3E7B5891CC2AC0600A0062D /* RNWhisper.mm */; };
+		7F458E922AC7DC74007045F6 /* RNWhisperAudioSessionUtils.m in Sources */ = {isa = PBXBuildFile; fileRef = 7F458E912AC7DC74007045F6 /* RNWhisperAudioSessionUtils.m */; };
+		7FE0BBA12ABE6C7B0049B4E4 /* RNWhisperDownloader.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE0BB9B2ABE6C7B0049B4E4 /* RNWhisperDownloader.m */; };
+		7FE0BBA22ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE0BB9C2ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m */; };
+		7FE0BBA32ABE6C7B0049B4E4 /* RNWhisperContext.mm in Sources */ = {isa = PBXBuildFile; fileRef = 7FE0BBA02ABE6C7B0049B4E4 /* RNWhisperContext.mm */; };
 /* End PBXBuildFile section */
 /* Begin PBXCopyFilesBuildPhase section */
@@ -24,6 +28,15 @@
 /* Begin PBXFileReference section */
 		134814201AA4EA6300B7C361 /* libRNWhisper.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libRNWhisper.a; sourceTree = BUILT_PRODUCTS_DIR; };
+		7F458E902AC7DC74007045F6 /* RNWhisperAudioSessionUtils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RNWhisperAudioSessionUtils.h; sourceTree = "<group>"; };
+		7F458E912AC7DC74007045F6 /* RNWhisperAudioSessionUtils.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RNWhisperAudioSessionUtils.m; sourceTree = "<group>"; };
+		7FE0BB9A2ABE6C7B0049B4E4 /* RNWhisper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RNWhisper.h; sourceTree = "<group>"; };
+		7FE0BB9B2ABE6C7B0049B4E4 /* RNWhisperDownloader.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RNWhisperDownloader.m; sourceTree = "<group>"; };
+		7FE0BB9C2ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RNWhisperAudioUtils.m; sourceTree = "<group>"; };
+		7FE0BB9D2ABE6C7B0049B4E4 /* RNWhisperContext.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RNWhisperContext.h; sourceTree = "<group>"; };
+		7FE0BB9E2ABE6C7B0049B4E4 /* RNWhisperDownloader.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RNWhisperDownloader.h; sourceTree = "<group>"; };
+		7FE0BB9F2ABE6C7B0049B4E4 /* RNWhisperAudioUtils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RNWhisperAudioUtils.h; sourceTree = "<group>"; };
+		7FE0BBA02ABE6C7B0049B4E4 /* RNWhisperContext.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = RNWhisperContext.mm; sourceTree = "<group>"; };
 		B3E7B5891CC2AC0600A0062D /* RNWhisper.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = RNWhisper.mm; sourceTree = "<group>"; };
 /* End PBXFileReference section */
@@ -49,6 +62,15 @@
 		58B511D21A9E6C8500147676 = {
 			isa = PBXGroup;
 			children = (
+				7F458E902AC7DC74007045F6 /* RNWhisperAudioSessionUtils.h */,
+				7F458E912AC7DC74007045F6 /* RNWhisperAudioSessionUtils.m */,
+				7FE0BB9F2ABE6C7B0049B4E4 /* RNWhisperAudioUtils.h */,
+				7FE0BB9C2ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m */,
+				7FE0BB9A2ABE6C7B0049B4E4 /* RNWhisper.h */,
+				7FE0BB9D2ABE6C7B0049B4E4 /* RNWhisperContext.h */,
+				7FE0BBA02ABE6C7B0049B4E4 /* RNWhisperContext.mm */,
+				7FE0BB9E2ABE6C7B0049B4E4 /* RNWhisperDownloader.h */,
+				7FE0BB9B2ABE6C7B0049B4E4 /* RNWhisperDownloader.m */,
 				B3E7B5891CC2AC0600A0062D /* RNWhisper.mm */,
 				134814211AA4EA7D00B7C361 /* Products */,
 			);
@@ -112,6 +134,10 @@
 			buildActionMask = 2147483647;
 			files = (
 				5E555C0D2413F4C50049A1A2 /* RNWhisper.mm in Sources */,
+				7FE0BBA22ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m in Sources */,
+				7FE0BBA32ABE6C7B0049B4E4 /* RNWhisperContext.mm in Sources */,
+				7FE0BBA12ABE6C7B0049B4E4 /* RNWhisperDownloader.m in Sources */,
+				7F458E922AC7DC74007045F6 /* RNWhisperAudioSessionUtils.m in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -223,9 +249,7 @@
 					"$(SRCROOT)/../../react-native/React/**",
 				);
 				LIBRARY_SEARCH_PATHS = "$(inherited)";
-				OTHER_LDFLAGS = (
-					"-ObjC",
-				);
+				OTHER_LDFLAGS = "-ObjC";
 				PRODUCT_NAME = RNWhisper;
 				SKIP_INSTALL = YES;
 			};

package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcuserdata/jhen.xcuserdatad/UserInterfaceState.xcuserstate CHANGED Viewed

Binary file

package/ios/RNWhisper.xcodeproj/xcuserdata/jhen.xcuserdatad/xcschemes/xcschememanagement.plist CHANGED Viewed

@@ -4,6 +4,11 @@
 <dict>
 	<key>SchemeUserState</key>
 	<dict>
+		<key>RNWhisper.xcscheme_^#shared#^_</key>
+		<dict>
+			<key>orderHint</key>
+			<integer>0</integer>
+		</dict>
 		<key>WhisperCpp.xcscheme_^#shared#^_</key>
 		<dict>
 			<key>orderHint</key>

package/ios/RNWhisperAudioSessionUtils.h ADDED Viewed

@@ -0,0 +1,13 @@
+#import <Foundation/Foundation.h>
+#import <AVFoundation/AVFoundation.h>
+@interface RNWhisperAudioSessionUtils : NSObject
++(NSString *)getCurrentCategory;
++(NSArray *)getCurrentOptions;
++(NSString *)getCurrentMode;
++(void)setCategory:(NSString *)category options:(NSArray *)options error:(NSError **)error;
++(void)setMode:(NSString *)mode error:(NSError **)error;
++(void)setActive:(BOOL)active error:(NSError **)error;
+@end

package/ios/RNWhisperAudioSessionUtils.m ADDED Viewed

@@ -0,0 +1,85 @@
+#import "RNWhisperAudioSessionUtils.h"
+@implementation RNWhisperAudioSessionUtils
+static NSDictionary *_categories;
+static NSDictionary *_options;
+static NSDictionary *_modes;
++ (void)initialize {
+    _categories = @{
+        @"Ambient": AVAudioSessionCategoryAmbient,
+        @"SoloAmbient": AVAudioSessionCategorySoloAmbient,
+        @"Playback": AVAudioSessionCategoryPlayback,
+        @"Record": AVAudioSessionCategoryRecord,
+        @"PlayAndRecord": AVAudioSessionCategoryPlayAndRecord,
+        @"MultiRoute": AVAudioSessionCategoryMultiRoute
+    };
+    _options = @{
+        @"MixWithOthers": @(AVAudioSessionCategoryOptionMixWithOthers),
+        @"DuckOthers": @(AVAudioSessionCategoryOptionDuckOthers),
+        @"InterruptSpokenAudioAndMixWithOthers": @(AVAudioSessionCategoryOptionInterruptSpokenAudioAndMixWithOthers),
+        @"AllowBluetooth": @(AVAudioSessionCategoryOptionAllowBluetooth),
+        @"AllowBluetoothA2DP": @(AVAudioSessionCategoryOptionAllowBluetoothA2DP),
+        @"AllowAirPlay": @(AVAudioSessionCategoryOptionAllowAirPlay),
+        @"DefaultToSpeaker": @(AVAudioSessionCategoryOptionDefaultToSpeaker)
+    };
+    _modes = @{
+        @"Default": AVAudioSessionModeDefault,
+        @"VoiceChat": AVAudioSessionModeVoiceChat,
+        @"VideoChat": AVAudioSessionModeVideoChat,
+        @"GameChat": AVAudioSessionModeGameChat,
+        @"VideoRecording": AVAudioSessionModeVideoRecording,
+        @"Measurement": AVAudioSessionModeMeasurement,
+        @"MoviePlayback": AVAudioSessionModeMoviePlayback,
+        @"SpokenAudio": AVAudioSessionModeSpokenAudio
+    };
+}
++(NSString *)getCurrentCategory {
+    AVAudioSession *session = [AVAudioSession sharedInstance];
+    return session.category;
+}
++(NSArray *)getCurrentOptions {
+    AVAudioSession *session = [AVAudioSession sharedInstance];
+    AVAudioSessionCategoryOptions options = session.categoryOptions;
+    NSMutableArray *result = [NSMutableArray array];
+    for (NSString *key in _options) {
+        if ((options & [[_options objectForKey:key] unsignedIntegerValue]) != 0) {
+            [result addObject:key];
+        }
+    }
+    return result;
+}
++(NSString *)getCurrentMode {
+    AVAudioSession *session = [AVAudioSession sharedInstance];
+    return session.mode;
+}
++(AVAudioSessionCategoryOptions)getOptions:(NSArray *)options {
+    AVAudioSessionCategoryOptions result = 0;
+    for (NSString *option in options) {
+        result |= [[_options objectForKey:option] unsignedIntegerValue];
+    }
+    return result;
+}
++(void)setCategory:(NSString *)category options:(NSArray *)options error:(NSError **)error {
+    AVAudioSession *session = [AVAudioSession sharedInstance];
+    [session setCategory:[_categories objectForKey:category] withOptions:[self getOptions:options] error:error];
+}
++(void)setMode:(NSString *)mode error:(NSError **)error {
+    AVAudioSession *session = [AVAudioSession sharedInstance];
+    [session setMode:[_modes objectForKey:mode] error:error];
+}
++(void)setActive:(BOOL)active error:(NSError **)error {
+    AVAudioSession *session = [AVAudioSession sharedInstance];
+    [session setActive:active error:error];
+}
+@end

package/ios/RNWhisperAudioUtils.h ADDED Viewed

@@ -0,0 +1,9 @@
+#import <Foundation/Foundation.h>
+@interface RNWhisperAudioUtils : NSObject
++ (NSData *)concatShortBuffers:(NSMutableArray<NSValue *> *)buffers sliceNSamples:(NSMutableArray<NSNumber *> *)sliceNSamples;
++ (void)saveWavFile:(NSData *)rawData audioOutputFile:(NSString *)audioOutputFile;
++ (float *)decodeWaveFile:(NSString*)filePath count:(int *)count;
+@end

package/ios/RNWhisperAudioUtils.m ADDED Viewed

@@ -0,0 +1,83 @@
+#import "RNWhisperAudioUtils.h"
+#import "whisper.h"
+@implementation RNWhisperAudioUtils
++ (NSData *)concatShortBuffers:(NSMutableArray<NSValue *> *)buffers sliceNSamples:(NSMutableArray<NSNumber *> *)sliceNSamples {
+    NSMutableData *outputData = [NSMutableData data];
+    for (int i = 0; i < buffers.count; i++) {
+        int size = [sliceNSamples objectAtIndex:i].intValue;
+        NSValue *buffer = [buffers objectAtIndex:i];
+        short *bufferPtr = buffer.pointerValue;
+        [outputData appendBytes:bufferPtr length:size * sizeof(short)];
+    }
+    return outputData;
+}
++ (void)saveWavFile:(NSData *)rawData audioOutputFile:(NSString *)audioOutputFile {
+    NSMutableData *outputData = [NSMutableData data];
+    // WAVE header
+    [outputData appendData:[@"RIFF" dataUsingEncoding:NSUTF8StringEncoding]]; // chunk id
+    int chunkSize = CFSwapInt32HostToLittle(36 + rawData.length);
+    [outputData appendBytes:&chunkSize length:sizeof(chunkSize)];
+    [outputData appendData:[@"WAVE" dataUsingEncoding:NSUTF8StringEncoding]]; // format
+    [outputData appendData:[@"fmt " dataUsingEncoding:NSUTF8StringEncoding]]; // subchunk 1 id
+    int subchunk1Size = CFSwapInt32HostToLittle(16);
+    [outputData appendBytes:&subchunk1Size length:sizeof(subchunk1Size)];
+    short audioFormat = CFSwapInt16HostToLittle(1); // PCM
+    [outputData appendBytes:&audioFormat length:sizeof(audioFormat)];
+    short numChannels = CFSwapInt16HostToLittle(1); // mono
+    [outputData appendBytes:&numChannels length:sizeof(numChannels)];
+    int sampleRate = CFSwapInt32HostToLittle(WHISPER_SAMPLE_RATE);
+    [outputData appendBytes:&sampleRate length:sizeof(sampleRate)];
+    // (bitDepth * sampleRate * channels) >> 3
+    int byteRate = CFSwapInt32HostToLittle(WHISPER_SAMPLE_RATE * 1 * 16 / 8);
+    [outputData appendBytes:&byteRate length:sizeof(byteRate)];
+    // (bitDepth * channels) >> 3
+    short blockAlign = CFSwapInt16HostToLittle(16 / 8);
+    [outputData appendBytes:&blockAlign length:sizeof(blockAlign)];
+    // bitDepth
+    short bitsPerSample = CFSwapInt16HostToLittle(16);
+    [outputData appendBytes:&bitsPerSample length:sizeof(bitsPerSample)];
+    [outputData appendData:[@"data" dataUsingEncoding:NSUTF8StringEncoding]]; // subchunk 2 id
+    int subchunk2Size = CFSwapInt32HostToLittle((int)rawData.length);
+    [outputData appendBytes:&subchunk2Size length:sizeof(subchunk2Size)];
+    // Audio data
+    [outputData appendData:rawData];
+    // Save to file
+    [outputData writeToFile:audioOutputFile atomically:YES];
+}
++ (float *)decodeWaveFile:(NSString*)filePath count:(int *)count {
+    NSURL *url = [NSURL fileURLWithPath:filePath];
+    NSData *fileData = [NSData dataWithContentsOfURL:url];
+    if (fileData == nil) {
+        return nil;
+    }
+    NSMutableData *waveData = [[NSMutableData alloc] init];
+    [waveData appendData:[fileData subdataWithRange:NSMakeRange(44, [fileData length]-44)]];
+    const short *shortArray = (const short *)[waveData bytes];
+    int shortCount = (int) ([waveData length] / sizeof(short));
+    float *floatArray = (float *) malloc(shortCount * sizeof(float));
+    for (NSInteger i = 0; i < shortCount; i++) {
+        float floatValue = ((float)shortArray[i]) / 32767.0;
+        floatValue = MAX(floatValue, -1.0);
+        floatValue = MIN(floatValue, 1.0);
+        floatArray[i] = floatValue;
+    }
+    *count = shortCount;
+    return floatArray;
+}
+@end

package/ios/RNWhisperContext.h CHANGED Viewed

@@ -53,6 +53,7 @@ typedef struct {
     audioDataCount:(int)audioDataCount
     options:(NSDictionary *)options
     onProgress:(void (^)(int))onProgress
+    onNewSegments:(void (^)(NSDictionary *))onNewSegments
     onEnd:(void (^)(int))onEnd;
 - (void)stopTranscribe:(int)jobId;
 - (void)stopCurrentTranscribe;

package/ios/RNWhisperContext.mm CHANGED Viewed

@@ -1,4 +1,6 @@
 #import "RNWhisperContext.h"
+#import "RNWhisperAudioUtils.h"
+#include <vector>
 #define NUM_BYTES_PER_BUFFER 16 * 1024
@@ -77,6 +79,29 @@
     }
 }
+bool vad(RNWhisperContextRecordState *state, int16_t* audioBufferI16, int nSamples, int n)
+{
+    bool isSpeech = true;
+    if (!state->isTranscribing && state->options[@"useVad"]) {
+        int vadSec = state->options[@"vadMs"] != nil ? [state->options[@"vadMs"] intValue] / 1000 : 2;
+        int sampleSize = vadSec * WHISPER_SAMPLE_RATE;
+        if (nSamples + n > sampleSize) {
+            int start = nSamples + n - sampleSize;
+            std::vector<float> audioBufferF32Vec(sampleSize);
+            for (int i = 0; i < sampleSize; i++) {
+                audioBufferF32Vec[i] = (float)audioBufferI16[i + start] / 32768.0f;
+            }
+            float vadThold = state->options[@"vadThold"] != nil ? [state->options[@"vadThold"] floatValue] : 0.6f;
+            float vadFreqThold = state->options[@"vadFreqThold"] != nil ? [state->options[@"vadFreqThold"] floatValue] : 100.0f;
+            isSpeech = rn_whisper_vad_simple(audioBufferF32Vec, WHISPER_SAMPLE_RATE, 1000, vadThold, vadFreqThold, false);
+            NSLog(@"[RNWhisper] VAD result: %d", isSpeech);
+        } else {
+            isSpeech = false;
+        }
+    }
+    return isSpeech;
+}
 void AudioInputCallback(void * inUserData,
     AudioQueueRef inAQ,
     AudioQueueBufferRef inBuffer,
@@ -117,6 +142,11 @@ void AudioInputCallback(void * inUserData,
             !state->isTranscribing &&
             nSamples != state->nSamplesTranscribing
         ) {
+            int16_t* audioBufferI16 = (int16_t*) [state->shortBufferSlices[state->sliceIndex] pointerValue];
+            if (!vad(state, audioBufferI16, nSamples, 0)) {
+                state->transcribeHandler(state->jobId, @"end", @{});
+                return;
+            }
             state->isTranscribing = true;
             dispatch_async([state->mSelf getDispatchQueue], ^{
                 [state->mSelf fullTranscribeSamples:state];
@@ -142,11 +172,15 @@ void AudioInputCallback(void * inUserData,
     for (int i = 0; i < n; i++) {
         audioBufferI16[nSamples + i] = ((short*)inBuffer->mAudioData)[i];
     }
+    bool isSpeech = vad(state, audioBufferI16, nSamples, n);
     nSamples += n;
     state->sliceNSamples[state->sliceIndex] = [NSNumber numberWithInt:nSamples];
     AudioQueueEnqueueBuffer(state->queue, inBuffer, 0, NULL);
+    if (!isSpeech) return;
     if (!state->isTranscribing) {
         state->isTranscribing = true;
         dispatch_async([state->mSelf getDispatchQueue], ^{
@@ -167,7 +201,8 @@ void AudioInputCallback(void * inUserData,
         audioBufferF32[i] = (float)audioBufferI16[i] / 32768.0f;
     }
     CFTimeInterval timeStart = CACurrentMediaTime();
-    int code = [state->mSelf fullTranscribe:state->jobId audioData:audioBufferF32 audioDataCount:state->nSamplesTranscribing options:state->options];
+    struct whisper_full_params params = [state->mSelf getParams:state->options jobId:state->jobId];
+    int code = [state->mSelf fullTranscribe:state->jobId params:params audioData:audioBufferF32 audioDataCount:state->nSamplesTranscribing];
     free(audioBufferF32);
     CFTimeInterval timeEnd = CACurrentMediaTime();
     const float timeRecording = (float) state->nSamplesTranscribing / (float) state->dataFormat.mSampleRate;
@@ -212,6 +247,17 @@ void AudioInputCallback(void * inUserData,
         NSLog(@"[RNWhisper] Transcribe end");
         result[@"isStoppedByAction"] = @(state->isStoppedByAction);
         result[@"isCapturing"] = @(false);
+        // Save wav if needed
+        if (state->options[@"audioOutputPath"] != nil) {
+            // TODO: Append in real time so we don't need to keep all slices & also reduce memory usage
+            [RNWhisperAudioUtils
+                saveWavFile:[RNWhisperAudioUtils concatShortBuffers:state->shortBufferSlices
+                                sliceNSamples:state->sliceNSamples]
+                audioOutputFile:state->options[@"audioOutputPath"]
+            ];
+        }
         state->transcribeHandler(state->jobId, @"end", result);
     } else if (code == 0) {
         result[@"isCapturing"] = @(true);
@@ -272,18 +318,70 @@ void AudioInputCallback(void * inUserData,
     return status;
 }
+struct rnwhisper_segments_callback_data {
+    void (^onNewSegments)(NSDictionary *);
+    int total_n_new;
+};
 - (void)transcribeFile:(int)jobId
     audioData:(float *)audioData
     audioDataCount:(int)audioDataCount
     options:(NSDictionary *)options
     onProgress:(void (^)(int))onProgress
+    onNewSegments:(void (^)(NSDictionary *))onNewSegments
     onEnd:(void (^)(int))onEnd
 {
     dispatch_async(dQueue, ^{
         self->recordState.isStoppedByAction = false;
         self->recordState.isTranscribing = true;
         self->recordState.jobId = jobId;
-        int code = [self fullTranscribeWithProgress:onProgress jobId:jobId audioData:audioData audioDataCount:audioDataCount options:options];
+        whisper_full_params params = [self getParams:options jobId:jobId];
+        if (options[@"onProgress"] && [options[@"onProgress"] boolValue]) {
+            params.progress_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, int progress, void * user_data) {
+                void (^onProgress)(int) = (__bridge void (^)(int))user_data;
+                onProgress(progress);
+            };
+            params.progress_callback_user_data = (__bridge void *)(onProgress);
+        }
+        if (options[@"onNewSegments"] && [options[@"onNewSegments"] boolValue]) {
+            params.new_segment_callback = [](struct whisper_context * ctx, struct whisper_state * /*state*/, int n_new, void * user_data) {
+                struct rnwhisper_segments_callback_data *data = (struct rnwhisper_segments_callback_data *)user_data;
+                data->total_n_new += n_new;
+                NSString *text = @"";
+                NSMutableArray *segments = [[NSMutableArray alloc] init];
+                for (int i = data->total_n_new - n_new; i < data->total_n_new; i++) {
+                    const char * text_cur = whisper_full_get_segment_text(ctx, i);
+                    text = [text stringByAppendingString:[NSString stringWithUTF8String:text_cur]];
+                    const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
+                    const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
+                    NSDictionary *segment = @{
+                        @"text": [NSString stringWithUTF8String:text_cur],
+                        @"t0": [NSNumber numberWithLongLong:t0],
+                        @"t1": [NSNumber numberWithLongLong:t1]
+                    };
+                    [segments addObject:segment];
+                }
+                NSDictionary *result = @{
+                    @"nNew": [NSNumber numberWithInt:n_new],
+                    @"totalNNew": [NSNumber numberWithInt:data->total_n_new],
+                    @"result": text,
+                    @"segments": segments
+                };
+                void (^onNewSegments)(NSDictionary *) = (void (^)(NSDictionary *))data->onNewSegments;
+                onNewSegments(result);
+            };
+            struct rnwhisper_segments_callback_data user_data = {
+                .onNewSegments = onNewSegments,
+                .total_n_new = 0
+            };
+            params.new_segment_callback_user_data = &user_data;
+        }
+        int code = [self fullTranscribe:jobId params:params audioData:audioData audioDataCount:audioDataCount];
         self->recordState.jobId = -1;
         self->recordState.isTranscribing = false;
         onEnd(code);
@@ -383,36 +481,11 @@ void AudioInputCallback(void * inUserData,
     return params;
 }
-- (int)fullTranscribeWithProgress:(void (^)(int))onProgress
-  jobId:(int)jobId
-  audioData:(float *)audioData
-  audioDataCount:(int)audioDataCount
-  options:(NSDictionary *)options
-{
-    struct whisper_full_params params = [self getParams:options jobId:jobId];
-    if (options[@"onProgress"] && [options[@"onProgress"] boolValue]) {
-        params.progress_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, int progress, void * user_data) {
-            void (^onProgress)(int) = (__bridge void (^)(int))user_data;
-            onProgress(progress);
-        };
-        params.progress_callback_user_data = (__bridge void *)(onProgress);
-    }
-    whisper_reset_timings(self->ctx);
-    int code = whisper_full(self->ctx, params, audioData, audioDataCount);
-    rn_whisper_remove_abort_map(jobId);
-    // if (code == 0) {
-    //     whisper_print_timings(self->ctx);
-    // }
-    return code;
-}
 - (int)fullTranscribe:(int)jobId
+  params:(struct whisper_full_params)params
   audioData:(float *)audioData
   audioDataCount:(int)audioDataCount
-  options:(NSDictionary *)options
 {
-    struct whisper_full_params params = [self getParams:options jobId:jobId];
     whisper_reset_timings(self->ctx);
     int code = whisper_full(self->ctx, params, audioData, audioDataCount);