whisper.rn 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/README.md +69 -0
  2. package/android/src/main/java/com/rnwhisper/RNWhisper.java +212 -0
  3. package/android/src/main/java/com/rnwhisper/WhisperContext.java +34 -4
  4. package/android/src/main/java/com/rnwhisper/WhisperVadContext.java +101 -0
  5. package/android/src/main/jni.cpp +196 -0
  6. package/android/src/main/jniLibs/arm64-v8a/librnwhisper.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnwhisper_v8fp16_va_2.so +0 -0
  8. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper.so +0 -0
  9. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper_vfpv4.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/librnwhisper.so +0 -0
  11. package/android/src/main/jniLibs/x86_64/librnwhisper_x86_64.so +0 -0
  12. package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +26 -0
  13. package/android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java +26 -0
  14. package/ios/RNWhisper.mm +147 -0
  15. package/ios/RNWhisperContext.mm +18 -24
  16. package/ios/RNWhisperVadContext.h +29 -0
  17. package/ios/RNWhisperVadContext.mm +148 -0
  18. package/jest/mock.js +19 -0
  19. package/lib/commonjs/NativeRNWhisper.js.map +1 -1
  20. package/lib/commonjs/index.js +111 -1
  21. package/lib/commonjs/index.js.map +1 -1
  22. package/lib/module/NativeRNWhisper.js.map +1 -1
  23. package/lib/module/index.js +112 -0
  24. package/lib/module/index.js.map +1 -1
  25. package/lib/typescript/NativeRNWhisper.d.ts +35 -0
  26. package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
  27. package/lib/typescript/index.d.ts +39 -3
  28. package/lib/typescript/index.d.ts.map +1 -1
  29. package/package.json +1 -1
  30. package/src/NativeRNWhisper.ts +48 -0
  31. package/src/index.ts +132 -1
@@ -77,6 +77,32 @@ public class RNWhisperModule extends NativeRNWhisperSpec {
77
77
  rnwhisper.releaseAllContexts(promise);
78
78
  }
79
79
 
80
+ // VAD methods
81
+ @ReactMethod
82
+ public void initVadContext(final ReadableMap options, final Promise promise) {
83
+ rnwhisper.initVadContext(options, promise);
84
+ }
85
+
86
+ @ReactMethod
87
+ public void vadDetectSpeech(double id, String audioDataBase64, ReadableMap options, Promise promise) {
88
+ rnwhisper.vadDetectSpeech(id, audioDataBase64, options, promise);
89
+ }
90
+
91
+ @ReactMethod
92
+ public void vadDetectSpeechFile(double id, String filePath, ReadableMap options, Promise promise) {
93
+ rnwhisper.vadDetectSpeechFile(id, filePath, options, promise);
94
+ }
95
+
96
+ @ReactMethod
97
+ public void releaseVadContext(double id, Promise promise) {
98
+ rnwhisper.releaseVadContext(id, promise);
99
+ }
100
+
101
+ @ReactMethod
102
+ public void releaseAllVadContexts(Promise promise) {
103
+ rnwhisper.releaseAllVadContexts(promise);
104
+ }
105
+
80
106
  /*
81
107
  * iOS Specific methods, left here for make the turbo module happy:
82
108
  */
@@ -76,4 +76,30 @@ public class RNWhisperModule extends ReactContextBaseJavaModule {
76
76
  public void releaseAllContexts(Promise promise) {
77
77
  rnwhisper.releaseAllContexts(promise);
78
78
  }
79
+
80
+ // VAD methods
81
+ @ReactMethod
82
+ public void initVadContext(final ReadableMap options, final Promise promise) {
83
+ rnwhisper.initVadContext(options, promise);
84
+ }
85
+
86
+ @ReactMethod
87
+ public void vadDetectSpeech(double id, String audioDataBase64, ReadableMap options, Promise promise) {
88
+ rnwhisper.vadDetectSpeech(id, audioDataBase64, options, promise);
89
+ }
90
+
91
+ @ReactMethod
92
+ public void vadDetectSpeechFile(double id, String filePath, ReadableMap options, Promise promise) {
93
+ rnwhisper.vadDetectSpeechFile(id, filePath, options, promise);
94
+ }
95
+
96
+ @ReactMethod
97
+ public void releaseVadContext(double id, Promise promise) {
98
+ rnwhisper.releaseVadContext(id, promise);
99
+ }
100
+
101
+ @ReactMethod
102
+ public void releaseAllVadContexts(Promise promise) {
103
+ rnwhisper.releaseAllVadContexts(promise);
104
+ }
79
105
  }
package/ios/RNWhisper.mm CHANGED
@@ -1,5 +1,6 @@
1
1
  #import "RNWhisper.h"
2
2
  #import "RNWhisperContext.h"
3
+ #import "RNWhisperVadContext.h"
3
4
  #import "RNWhisperDownloader.h"
4
5
  #import "RNWhisperAudioUtils.h"
5
6
  #import "RNWhisperAudioSessionUtils.h"
@@ -13,6 +14,7 @@
13
14
  @implementation RNWhisper
14
15
 
15
16
  NSMutableDictionary *contexts;
17
+ NSMutableDictionary *vadContexts;
16
18
 
17
19
  RCT_EXPORT_MODULE()
18
20
 
@@ -366,6 +368,15 @@ RCT_REMAP_METHOD(releaseAllContexts,
366
368
  [context invalidate];
367
369
  }
368
370
 
371
+ if (vadContexts != nil) {
372
+ for (NSNumber *contextId in vadContexts) {
373
+ RNWhisperVadContext *vadContext = vadContexts[contextId];
374
+ [vadContext invalidate];
375
+ }
376
+ [vadContexts removeAllObjects];
377
+ vadContexts = nil;
378
+ }
379
+
369
380
  rnwhisper::job_abort_all(); // graceful abort
370
381
 
371
382
  [contexts removeAllObjects];
@@ -437,6 +448,142 @@ RCT_REMAP_METHOD(setAudioSessionActive,
437
448
  resolve(nil);
438
449
  }
439
450
 
451
+ RCT_REMAP_METHOD(initVadContext,
452
+ withVadOptions:(NSDictionary *)vadOptions
453
+ withResolver:(RCTPromiseResolveBlock)resolve
454
+ withRejecter:(RCTPromiseRejectBlock)reject)
455
+ {
456
+ if (vadContexts == nil) {
457
+ vadContexts = [[NSMutableDictionary alloc] init];
458
+ }
459
+
460
+ NSString *modelPath = [vadOptions objectForKey:@"filePath"];
461
+ BOOL isBundleAsset = [[vadOptions objectForKey:@"isBundleAsset"] boolValue];
462
+ BOOL useGpu = [[vadOptions objectForKey:@"useGpu"] boolValue];
463
+ NSNumber *nThreads = [vadOptions objectForKey:@"nThreads"];
464
+
465
+ NSString *path = modelPath;
466
+ if ([path hasPrefix:@"http://"] || [path hasPrefix:@"https://"]) {
467
+ path = [RNWhisperDownloader downloadFile:path toFile:nil];
468
+ }
469
+ if (isBundleAsset) {
470
+ path = [[NSBundle mainBundle] pathForResource:modelPath ofType:nil];
471
+ }
472
+
473
+ int contextId = arc4random_uniform(1000000);
474
+
475
+ RNWhisperVadContext *vadContext = [RNWhisperVadContext
476
+ initWithModelPath:path
477
+ contextId:contextId
478
+ noMetal:!useGpu
479
+ nThreads:nThreads
480
+ ];
481
+ if ([vadContext getVadContext] == NULL) {
482
+ reject(@"whisper_vad_error", @"Failed to load the VAD model", nil);
483
+ return;
484
+ }
485
+
486
+ [vadContexts setObject:vadContext forKey:[NSNumber numberWithInt:contextId]];
487
+
488
+ resolve(@{
489
+ @"contextId": @(contextId),
490
+ @"gpu": @([vadContext isMetalEnabled]),
491
+ @"reasonNoGPU": [vadContext reasonNoMetal],
492
+ });
493
+ }
494
+
495
+ RCT_REMAP_METHOD(vadDetectSpeech,
496
+ withContextId:(int)contextId
497
+ withAudioData:(NSString *)audioDataBase64
498
+ withOptions:(NSDictionary *)options
499
+ withResolver:(RCTPromiseResolveBlock)resolve
500
+ withRejecter:(RCTPromiseRejectBlock)reject)
501
+ {
502
+ RNWhisperVadContext *vadContext = vadContexts[[NSNumber numberWithInt:contextId]];
503
+
504
+ if (vadContext == nil) {
505
+ reject(@"whisper_vad_error", @"VAD context not found", nil);
506
+ return;
507
+ }
508
+
509
+ // Decode base64 audio data
510
+ NSData *pcmData = [[NSData alloc] initWithBase64EncodedString:audioDataBase64 options:0];
511
+ if (pcmData == nil) {
512
+ reject(@"whisper_vad_error", @"Invalid audio data", nil);
513
+ return;
514
+ }
515
+
516
+ int count = 0;
517
+ float *data = [RNWhisperAudioUtils decodeWaveData:pcmData count:&count cutHeader:NO];
518
+
519
+ NSArray *segments = [vadContext detectSpeech:data samplesCount:count options:options];
520
+ resolve(segments);
521
+ }
522
+
523
+ RCT_REMAP_METHOD(vadDetectSpeechFile,
524
+ withVadContextId:(int)contextId
525
+ withFilePath:(NSString *)filePath
526
+ withOptions:(NSDictionary *)options
527
+ withResolver:(RCTPromiseResolveBlock)resolve
528
+ withRejecter:(RCTPromiseRejectBlock)reject)
529
+ {
530
+ RNWhisperVadContext *vadContext = vadContexts[[NSNumber numberWithInt:contextId]];
531
+
532
+ if (vadContext == nil) {
533
+ reject(@"whisper_vad_error", @"VAD context not found", nil);
534
+ return;
535
+ }
536
+
537
+ // Handle different input types like transcribeFile does
538
+ float *data = nil;
539
+ int count = 0;
540
+ if ([filePath hasPrefix:@"http://"] || [filePath hasPrefix:@"https://"]) {
541
+ NSString *path = [RNWhisperDownloader downloadFile:filePath toFile:nil];
542
+ data = [RNWhisperAudioUtils decodeWaveFile:path count:&count];
543
+ } else if ([filePath hasPrefix:@"data:audio/wav;base64,"]) {
544
+ NSData *waveData = [[NSData alloc] initWithBase64EncodedString:[filePath substringFromIndex:22] options:0];
545
+ data = [RNWhisperAudioUtils decodeWaveData:waveData count:&count cutHeader:YES];
546
+ } else {
547
+ data = [RNWhisperAudioUtils decodeWaveFile:filePath count:&count];
548
+ }
549
+
550
+ if (data == nil) {
551
+ reject(@"whisper_vad_error", @"Failed to load or decode audio file", nil);
552
+ return;
553
+ }
554
+
555
+ NSArray *segments = [vadContext detectSpeech:data samplesCount:count options:options];
556
+ resolve(segments);
557
+ }
558
+
559
+ RCT_REMAP_METHOD(releaseVadContext,
560
+ withVadContextId:(int)contextId
561
+ withResolver:(RCTPromiseResolveBlock)resolve
562
+ withRejecter:(RCTPromiseRejectBlock)reject)
563
+ {
564
+ RNWhisperVadContext *vadContext = vadContexts[[NSNumber numberWithInt:contextId]];
565
+ if (vadContext == nil) {
566
+ reject(@"whisper_vad_error", @"VAD context not found", nil);
567
+ return;
568
+ }
569
+ [vadContext invalidate];
570
+ [vadContexts removeObjectForKey:[NSNumber numberWithInt:contextId]];
571
+ resolve(nil);
572
+ }
573
+
574
+ RCT_EXPORT_METHOD(releaseAllVadContexts:(RCTPromiseResolveBlock)resolve
575
+ withRejecter:(RCTPromiseRejectBlock)reject)
576
+ {
577
+ if (vadContexts != nil) {
578
+ for (NSNumber *contextId in vadContexts) {
579
+ RNWhisperVadContext *vadContext = vadContexts[contextId];
580
+ [vadContext invalidate];
581
+ }
582
+ [vadContexts removeAllObjects];
583
+ }
584
+ resolve(nil);
585
+ }
586
+
440
587
  #ifdef RCT_NEW_ARCH_ENABLED
441
588
  - (std::shared_ptr<facebook::react::TurboModule>)getTurboModule:
442
589
  (const facebook::react::ObjCTurboModule::InitParams &)params
@@ -36,36 +36,30 @@
36
36
  NSLog(@"[RNWhisper] ggml-metal is not enabled in this build, ignoring use_gpu option");
37
37
  cparams.use_gpu = false;
38
38
  }
39
+ reasonNoMetal = @"Metal is not enabled in this build";
39
40
  #endif
40
41
 
41
42
  #ifdef WSP_GGML_USE_METAL
42
43
  if (cparams.use_gpu) {
43
- #if TARGET_OS_SIMULATOR
44
- NSLog(@"[RNWhisper] ggml-metal is not available in simulator, ignoring use_gpu option: %@", reasonNoMetal);
45
- cparams.use_gpu = false;
46
- #else // TARGET_OS_SIMULATOR
47
- // Check ggml-metal availability
48
- NSError * error = nil;
49
44
  id<MTLDevice> device = MTLCreateSystemDefaultDevice();
50
- id<MTLLibrary> library = [device
51
- newLibraryWithSource:@"#include <metal_stdlib>\n"
52
- "using namespace metal;"
53
- "kernel void test() { simd_sum(0); }"
54
- options:nil
55
- error:&error
56
- ];
57
- if (error) {
58
- reasonNoMetal = [error localizedDescription];
59
- } else {
60
- id<MTLFunction> kernel = [library newFunctionWithName:@"test"];
61
- id<MTLComputePipelineState> pipeline = [device newComputePipelineStateWithFunction:kernel error:&error];
62
- if (pipeline == nil) {
63
- reasonNoMetal = [error localizedDescription];
64
- NSLog(@"[RNWhisper] ggml-metal is not available, ignoring use_gpu option: %@", reasonNoMetal);
65
- cparams.use_gpu = false;
66
- }
45
+
46
+ // Check ggml-metal availability
47
+ BOOL supportsGgmlMetal = [device supportsFamily:MTLGPUFamilyApple7];
48
+ if (@available(iOS 16.0, tvOS 16.0, *)) {
49
+ supportsGgmlMetal = supportsGgmlMetal && [device supportsFamily:MTLGPUFamilyMetal3];
67
50
  }
68
- #endif // TARGET_OS_SIMULATOR
51
+ if (!supportsGgmlMetal) {
52
+ cparams.use_gpu = false;
53
+ reasonNoMetal = @"Metal is not supported in this device";
54
+ }
55
+
56
+ #if TARGET_OS_SIMULATOR
57
+ // Use the backend, but no layers because not supported fully on simulator
58
+ cparams.use_gpu = false;
59
+ reasonNoMetal = @"Metal is not supported in simulator";
60
+ #endif
61
+
62
+ device = nil;
69
63
  }
70
64
  #endif // WSP_GGML_USE_METAL
71
65
 
@@ -0,0 +1,29 @@
1
+ #ifdef __cplusplus
2
+ #if RNWHISPER_BUILD_FROM_SOURCE
3
+ #import "whisper.h"
4
+ #import "rn-whisper.h"
5
+ #else
6
+ #import <rnwhisper/whisper.h>
7
+ #import <rnwhisper/rn-whisper.h>
8
+ #endif
9
+ #endif
10
+
11
+ #import <Foundation/Foundation.h>
12
+
13
+ @interface RNWhisperVadContext : NSObject {
14
+ int contextId;
15
+ dispatch_queue_t dQueue;
16
+ struct whisper_vad_context * vctx;
17
+ NSString * reasonNoMetal;
18
+ bool isMetalEnabled;
19
+ }
20
+
21
+ + (instancetype)initWithModelPath:(NSString *)modelPath contextId:(int)contextId noMetal:(BOOL)noMetal nThreads:(NSNumber *)nThreads;
22
+ - (bool)isMetalEnabled;
23
+ - (NSString *)reasonNoMetal;
24
+ - (struct whisper_vad_context *)getVadContext;
25
+ - (dispatch_queue_t)getDispatchQueue;
26
+ - (NSArray *)detectSpeech:(float *)samples samplesCount:(int)samplesCount options:(NSDictionary *)options;
27
+ - (void)invalidate;
28
+
29
+ @end
@@ -0,0 +1,148 @@
1
+ #import "RNWhisperVadContext.h"
2
+ #import "RNWhisperAudioUtils.h"
3
+ #import <Metal/Metal.h>
4
+
5
+ @implementation RNWhisperVadContext
6
+
7
+ + (instancetype)initWithModelPath:(NSString *)modelPath contextId:(int)contextId noMetal:(BOOL)noMetal nThreads:(NSNumber *)nThreads {
8
+ RNWhisperVadContext *context = [[RNWhisperVadContext alloc] init];
9
+
10
+ context->contextId = contextId;
11
+ context->dQueue = dispatch_queue_create("rnwhisper.vad.serial_queue", DISPATCH_QUEUE_SERIAL);
12
+ NSString *reasonNoMetal = @"";
13
+
14
+ // Set up VAD context parameters
15
+ struct whisper_vad_context_params ctx_params = whisper_vad_default_context_params();
16
+ ctx_params.use_gpu = !noMetal;
17
+ if (nThreads != nil) {
18
+ ctx_params.n_threads = [nThreads intValue];
19
+ }
20
+
21
+ #ifdef WSP_GGML_USE_METAL
22
+ if (ctx_params.use_gpu) {
23
+ id<MTLDevice> device = MTLCreateSystemDefaultDevice();
24
+
25
+ // Check ggml-metal availability
26
+ BOOL supportsGgmlMetal = [device supportsFamily:MTLGPUFamilyApple7];
27
+ if (@available(iOS 16.0, tvOS 16.0, *)) {
28
+ supportsGgmlMetal = supportsGgmlMetal && [device supportsFamily:MTLGPUFamilyMetal3];
29
+ }
30
+ if (!supportsGgmlMetal) {
31
+ ctx_params.use_gpu = false;
32
+ reasonNoMetal = @"Metal is not supported in this device";
33
+ }
34
+
35
+ #if TARGET_OS_SIMULATOR
36
+ // Use the backend, but no layers because not supported fully on simulator
37
+ ctx_params.use_gpu = false;
38
+ reasonNoMetal = @"Metal is not supported in simulator";
39
+ #endif
40
+
41
+ device = nil;
42
+ }
43
+ #endif // WSP_GGML_USE_METAL
44
+
45
+ // Initialize VAD context
46
+ context->vctx = whisper_vad_init_from_file_with_params([modelPath UTF8String], ctx_params);
47
+
48
+ if (context->vctx == NULL) {
49
+ NSLog(@"Failed to initialize VAD context from model: %@", modelPath);
50
+ return nil;
51
+ }
52
+
53
+ // Check GPU status
54
+ context->isMetalEnabled = ctx_params.use_gpu;
55
+ context->reasonNoMetal = reasonNoMetal;
56
+
57
+ return context;
58
+ }
59
+
60
+ - (bool)isMetalEnabled {
61
+ return isMetalEnabled;
62
+ }
63
+
64
+ - (NSString *)reasonNoMetal {
65
+ return reasonNoMetal;
66
+ }
67
+
68
+ - (struct whisper_vad_context *)getVadContext {
69
+ return vctx;
70
+ }
71
+
72
+ - (dispatch_queue_t)getDispatchQueue {
73
+ return dQueue;
74
+ }
75
+
76
+ - (NSArray *)detectSpeech:(float *)samples samplesCount:(int)samplesCount options:(NSDictionary *)options {
77
+ if (vctx == NULL) {
78
+ NSLog(@"VAD context is null");
79
+ return @[];
80
+ }
81
+
82
+ // Run VAD detection
83
+ bool speechDetected = whisper_vad_detect_speech(vctx, samples, samplesCount);
84
+ if (!speechDetected) {
85
+ return @[];
86
+ }
87
+
88
+ // Get VAD parameters
89
+ struct whisper_vad_params vad_params = whisper_vad_default_params();
90
+
91
+ if ([options objectForKey:@"threshold"]) {
92
+ vad_params.threshold = [[options objectForKey:@"threshold"] floatValue];
93
+ }
94
+ if ([options objectForKey:@"minSpeechDurationMs"]) {
95
+ vad_params.min_speech_duration_ms = [[options objectForKey:@"minSpeechDurationMs"] intValue];
96
+ }
97
+ if ([options objectForKey:@"minSilenceDurationMs"]) {
98
+ vad_params.min_silence_duration_ms = [[options objectForKey:@"minSilenceDurationMs"] intValue];
99
+ }
100
+ if ([options objectForKey:@"maxSpeechDurationS"]) {
101
+ vad_params.max_speech_duration_s = [[options objectForKey:@"maxSpeechDurationS"] floatValue];
102
+ }
103
+ if ([options objectForKey:@"speechPadMs"]) {
104
+ vad_params.speech_pad_ms = [[options objectForKey:@"speechPadMs"] intValue];
105
+ }
106
+ if ([options objectForKey:@"samplesOverlap"]) {
107
+ vad_params.samples_overlap = [[options objectForKey:@"samplesOverlap"] floatValue];
108
+ }
109
+
110
+ // Get segments from VAD probabilities
111
+ struct whisper_vad_segments * segments = whisper_vad_segments_from_probs(vctx, vad_params);
112
+ if (segments == NULL) {
113
+ return @[];
114
+ }
115
+
116
+ // Convert segments to NSArray
117
+ NSMutableArray *result = [[NSMutableArray alloc] init];
118
+ int n_segments = whisper_vad_segments_n_segments(segments);
119
+
120
+ for (int i = 0; i < n_segments; i++) {
121
+ float t0 = whisper_vad_segments_get_segment_t0(segments, i);
122
+ float t1 = whisper_vad_segments_get_segment_t1(segments, i);
123
+
124
+ NSDictionary *segment = @{
125
+ @"t0": @(t0),
126
+ @"t1": @(t1)
127
+ };
128
+ [result addObject:segment];
129
+ }
130
+
131
+ // Clean up
132
+ whisper_vad_free_segments(segments);
133
+
134
+ return result;
135
+ }
136
+
137
+ - (void)invalidate {
138
+ if (vctx != NULL) {
139
+ whisper_vad_free(vctx);
140
+ vctx = NULL;
141
+ }
142
+ }
143
+
144
+ - (void)dealloc {
145
+ [self invalidate];
146
+ }
147
+
148
+ @end
package/jest/mock.js CHANGED
@@ -61,6 +61,25 @@ if (!NativeModules.RNWhisper) {
61
61
  releaseContext: jest.fn(() => Promise.resolve()),
62
62
  releaseAllContexts: jest.fn(() => Promise.resolve()),
63
63
 
64
+ // VAD methods
65
+ initVadContext: jest.fn(() => Promise.resolve({
66
+ contextId: 2,
67
+ gpu: false,
68
+ reasonNoGPU: 'Mock VAD context'
69
+ })),
70
+ vadDetectSpeech: jest.fn().mockResolvedValue([
71
+ { t0: 0.5, t1: 2.3 },
72
+ { t0: 3.1, t1: 5.8 },
73
+ { t0: 7.2, t1: 9.4 }
74
+ ]),
75
+ vadDetectSpeechFile: jest.fn().mockResolvedValue([
76
+ { t0: 0.5, t1: 2.3 },
77
+ { t0: 3.1, t1: 5.8 },
78
+ { t0: 7.2, t1: 9.4 }
79
+ ]),
80
+ releaseVadContext: jest.fn(() => Promise.resolve()),
81
+ releaseAllVadContexts: jest.fn(() => Promise.resolve()),
82
+
64
83
  // iOS AudioSession utils
65
84
  getAudioSessionCurrentCategory: jest.fn(() => Promise.resolve({
66
85
  category: 'AVAudioSessionCategoryPlayAndRecord',
@@ -1 +1 @@
1
- {"version":3,"names":["_reactNative","require","_default","TurboModuleRegistry","get","exports","default"],"sourceRoot":"../../src","sources":["NativeRNWhisper.ts"],"mappings":";;;;;;AACA,IAAAA,YAAA,GAAAC,OAAA;AAAkD,IAAAC,QAAA,GA0GnCC,gCAAmB,CAACC,GAAG,CAAO,WAAW,CAAC;AAAAC,OAAA,CAAAC,OAAA,GAAAJ,QAAA"}
1
+ {"version":3,"names":["_reactNative","require","_default","TurboModuleRegistry","get","exports","default"],"sourceRoot":"../../src","sources":["NativeRNWhisper.ts"],"mappings":";;;;;;AACA,IAAAA,YAAA,GAAAC,OAAA;AAAkD,IAAAC,QAAA,GA0JnCC,gCAAmB,CAACC,GAAG,CAAO,WAAW,CAAC;AAAAC,OAAA,CAAAC,OAAA,GAAAJ,QAAA"}
@@ -9,10 +9,12 @@ Object.defineProperty(exports, "AudioSessionIos", {
9
9
  return _AudioSessionIos.default;
10
10
  }
11
11
  });
12
- exports.WhisperContext = void 0;
12
+ exports.WhisperVadContext = exports.WhisperContext = void 0;
13
13
  exports.initWhisper = initWhisper;
14
+ exports.initWhisperVad = initWhisperVad;
14
15
  exports.libVersion = exports.isUseCoreML = exports.isCoreMLAllowFallback = void 0;
15
16
  exports.releaseAllWhisper = releaseAllWhisper;
17
+ exports.releaseAllWhisperVad = releaseAllWhisperVad;
16
18
  var _reactNative = require("react-native");
17
19
  var _NativeRNWhisper = _interopRequireDefault(require("./NativeRNWhisper"));
18
20
  var _AudioSessionIos = _interopRequireDefault(require("./AudioSessionIos"));
@@ -386,4 +388,112 @@ const isUseCoreML = !!useCoreML;
386
388
  exports.isUseCoreML = isUseCoreML;
387
389
  const isCoreMLAllowFallback = !!coreMLAllowFallback;
388
390
  exports.isCoreMLAllowFallback = isCoreMLAllowFallback;
391
+ class WhisperVadContext {
392
+ gpu = false;
393
+ reasonNoGPU = '';
394
+ constructor(_ref3) {
395
+ let {
396
+ contextId,
397
+ gpu,
398
+ reasonNoGPU
399
+ } = _ref3;
400
+ this.id = contextId;
401
+ this.gpu = gpu;
402
+ this.reasonNoGPU = reasonNoGPU;
403
+ }
404
+
405
+ /**
406
+ * Detect speech segments in audio file (path or base64 encoded wav file)
407
+ * base64: need add `data:audio/wav;base64,` prefix
408
+ */
409
+ async detectSpeech(filePathOrBase64) {
410
+ let options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
411
+ let path = '';
412
+ if (typeof filePathOrBase64 === 'number') {
413
+ try {
414
+ const source = _reactNative.Image.resolveAssetSource(filePathOrBase64);
415
+ if (source) path = source.uri;
416
+ } catch (e) {
417
+ throw new Error(`Invalid asset: ${filePathOrBase64}`);
418
+ }
419
+ } else {
420
+ if (filePathOrBase64.startsWith('http')) throw new Error('VAD remote file is not supported, please download it first');
421
+ path = filePathOrBase64;
422
+ }
423
+ if (path.startsWith('file://')) path = path.slice(7);
424
+
425
+ // Check if this is base64 encoded audio data
426
+ if (path.startsWith('data:audio/')) {
427
+ // This is base64 encoded audio data, use the raw data method
428
+ return _NativeRNWhisper.default.vadDetectSpeech(this.id, path, options);
429
+ } else {
430
+ // This is a file path, use the file method
431
+ return _NativeRNWhisper.default.vadDetectSpeechFile(this.id, path, options);
432
+ }
433
+ }
434
+
435
+ /**
436
+ * Detect speech segments in raw audio data (base64 encoded float32 PCM data)
437
+ */
438
+ async detectSpeechData(audioData) {
439
+ let options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
440
+ return _NativeRNWhisper.default.vadDetectSpeech(this.id, audioData, options);
441
+ }
442
+ async release() {
443
+ return _NativeRNWhisper.default.releaseVadContext(this.id);
444
+ }
445
+ }
446
+
447
+ /**
448
+ * Initialize a VAD context for voice activity detection
449
+ * @param options VAD context options
450
+ * @returns Promise resolving to WhisperVadContext instance
451
+ */
452
+ exports.WhisperVadContext = WhisperVadContext;
453
+ async function initWhisperVad(_ref4) {
454
+ let {
455
+ filePath,
456
+ isBundleAsset,
457
+ useGpu = true,
458
+ nThreads
459
+ } = _ref4;
460
+ let path = '';
461
+ if (typeof filePath === 'number') {
462
+ try {
463
+ const source = _reactNative.Image.resolveAssetSource(filePath);
464
+ if (source) {
465
+ path = source.uri;
466
+ }
467
+ } catch (e) {
468
+ throw new Error(`Invalid asset: ${filePath}`);
469
+ }
470
+ } else {
471
+ if (!isBundleAsset && filePath.startsWith('http')) throw new Error('VAD remote file is not supported, please download it first');
472
+ path = filePath;
473
+ }
474
+ if (path.startsWith('file://')) path = path.slice(7);
475
+ const {
476
+ contextId,
477
+ gpu,
478
+ reasonNoGPU
479
+ } = await _NativeRNWhisper.default.initVadContext({
480
+ filePath: path,
481
+ isBundleAsset: !!isBundleAsset,
482
+ useGpu,
483
+ nThreads
484
+ });
485
+ return new WhisperVadContext({
486
+ contextId,
487
+ gpu,
488
+ reasonNoGPU
489
+ });
490
+ }
491
+
492
+ /**
493
+ * Release all VAD contexts and free their memory
494
+ * @returns Promise resolving when all contexts are released
495
+ */
496
+ async function releaseAllWhisperVad() {
497
+ return _NativeRNWhisper.default.releaseAllVadContexts();
498
+ }
389
499
  //# sourceMappingURL=index.js.map